! [ -e /content ] && pip install -Uqq fastai # 在Colab上升级fastai
层
# 默认层
### 默认类级别 3
from __future__ import annotations
from fastai.imports import *
from fastai.torch_imports import *
from fastai.torch_core import *
from torch.nn.utils import weight_norm, spectral_norm
from nbdev.showdoc import *
自定义的 fastai 层以及获取它们的基本功能。
基本操作和调整大小
def module(*flds, **defaults):
"Decorator to create an `nn.Module` using `f` as `forward` method"
= [inspect.Parameter(o, inspect.Parameter.POSITIONAL_OR_KEYWORD) for o in flds]
pa = [inspect.Parameter(k, inspect.Parameter.POSITIONAL_OR_KEYWORD, default=v)
pb for k,v in defaults.items()]
= pa+pb
params = [*flds,*defaults.keys()]
all_flds
def _f(f):
class c(nn.Module):
def __init__(self, *args, **kwargs):
super().__init__()
for i,o in enumerate(args): kwargs[all_flds[i]] = o
= merge(defaults,kwargs)
kwargs for k,v in kwargs.items(): setattr(self,k,v)
__repr__ = basic_repr(all_flds)
= f
forward = inspect.Signature(params)
c.__signature__ __name__ = c.__qualname__ = f.__name__
c.= f.__doc__
c.__doc__ return c
return _f
@module()
def Identity(self, x):
"Do nothing at all"
return x
1), 1) test_eq(Identity()(
@module('func')
def Lambda(self, x):
"An easy way to create a pytorch layer for a simple `func`"
return self.func(x)
def _add2(x): return x+2
= Lambda(_add2)
tst = torch.randn(10,20)
x +2)
test_eq(tst(x), x= pickle.loads(pickle.dumps(tst))
tst2 +2)
test_eq(tst2(x), x tst
Lambda(func=<function _add2>)
class PartialLambda(Lambda):
"Layer that applies `partial(func, **kwargs)`"
def __init__(self, func, **kwargs):
super().__init__(partial(func, **kwargs))
self.repr = f'{func.__name__}, {kwargs}'
def forward(self, x): return self.func(x)
def __repr__(self): return f'{self.__class__.__name__}({self.repr})'
def test_func(a,b=2): return a+b
= PartialLambda(test_func, b=5)
tst +5) test_eq(tst(x), x
@module(full=False)
def Flatten(self, x):
"Flatten `x` to a single dimension, e.g. at end of a model. `full` for rank-1 tensor"
return x.view(-1) if self.full else x.view(x.size(0), -1) # 移除了对Tensorbase的类型转换
= Flatten()
tst = torch.randn(10,5,4)
x 10,20])
test_eq(tst(x).shape, [= Flatten(full=True)
tst 200]) test_eq(tst(x).shape, [
@module(tensor_cls=TensorBase)
def ToTensorBase(self, x):
"Convert x to TensorBase class"
return self.tensor_cls(x)
= ToTensorBase()
ttb = TensorImage(torch.rand(1,3,32,32))
timg type(ttb(timg)), TensorBase) test_eq(
class View(Module):
"Reshape `x` to `size`"
def __init__(self, *size): self.size = size
def forward(self, x): return x.view(self.size)
= View(10,5,4)
tst 10,5,4]) test_eq(tst(x).shape, [
class ResizeBatch(Module):
"Reshape `x` to `size`, keeping batch dim the same size"
def __init__(self, *size): self.size = size
def forward(self, x): return x.view((x.size(0),) + self.size)
= ResizeBatch(5,4)
tst 10,5,4]) test_eq(tst(x).shape, [
@module()
def Debugger(self,x):
"A module to debug inside a model."
set_trace()return x
def sigmoid_range(x, low, high):
"Sigmoid function with range `(low, high)`"
return torch.sigmoid(x) * (high - low) + low
= tensor([-10.,0.,10.])
test assert torch.allclose(sigmoid_range(test, -1, 2), tensor([-1.,0.5, 2.]), atol=1e-4, rtol=1e-4)
assert torch.allclose(sigmoid_range(test, -5, -1), tensor([-5.,-3.,-1.]), atol=1e-4, rtol=1e-4)
assert torch.allclose(sigmoid_range(test, 2, 4), tensor([2., 3., 4.]), atol=1e-4, rtol=1e-4)
@module('low','high')
def SigmoidRange(self, x):
"Sigmoid module with range `(low, high)`"
return sigmoid_range(x, self.low, self.high)
= SigmoidRange(-1, 2)
tst assert torch.allclose(tst(test), tensor([-1.,0.5, 2.]), atol=1e-4, rtol=1e-4)
池化层
class AdaptiveConcatPool1d(Module):
"Layer that concats `AdaptiveAvgPool1d` and `AdaptiveMaxPool1d`"
def __init__(self, size=None):
self.size = size or 1
self.ap = nn.AdaptiveAvgPool1d(self.size)
self.mp = nn.AdaptiveMaxPool1d(self.size)
def forward(self, x): return torch.cat([self.mp(x), self.ap(x)], 1)
class AdaptiveConcatPool2d(Module):
"Layer that concats `AdaptiveAvgPool2d` and `AdaptiveMaxPool2d`"
def __init__(self, size=None):
self.size = size or 1
self.ap = nn.AdaptiveAvgPool2d(self.size)
self.mp = nn.AdaptiveMaxPool2d(self.size)
def forward(self, x): return torch.cat([self.mp(x), self.ap(x)], 1)
如果输入为 bs x nf x h x h
,则输出为 bs x 2*nf x 1 x 1
(如果未传递大小)或 bs x 2*nf x size x size
。
= AdaptiveConcatPool2d()
tst = torch.randn(10,5,4,4)
x 10,10,1,1])
test_eq(tst(x).shape, [= torch.max(x, dim=2, keepdim=True)[0]
max1 = torch.max(max1, dim=3, keepdim=True)[0]
maxp 5], maxp)
test_eq(tst(x)[:,:5:], x.mean(dim=[2,3], keepdim=True))
test_eq(tst(x)[:,= AdaptiveConcatPool2d(2)
tst 10,10,2,2]) test_eq(tst(x).shape, [
class PoolType: Avg,Max,Cat = 'Avg','Max','Cat'
def adaptive_pool(pool_type):
return nn.AdaptiveAvgPool2d if pool_type=='Avg' else nn.AdaptiveMaxPool2d if pool_type=='Max' else AdaptiveConcatPool2d
class PoolFlatten(nn.Sequential):
"Combine `nn.AdaptiveAvgPool2d` and `Flatten`."
def __init__(self, pool_type=PoolType.Avg): super().__init__(adaptive_pool(pool_type)(1), Flatten())
= PoolFlatten()
tst 10,5])
test_eq(tst(x).shape, [=[2,3])) test_eq(tst(x), x.mean(dim
批量归一化层
= Enum('NormType', 'Batch BatchZero Weight Spectral Instance InstanceZero') NormType
def _get_norm(prefix, nf, ndim=2, zero=False, **kwargs):
"Norm layer with `nf` features and `ndim` initialized depending on `norm_type`."
assert 1 <= ndim <= 3
= getattr(nn, f"{prefix}{ndim}d")(nf, **kwargs)
bn if bn.affine:
1e-3)
bn.bias.data.fill_(0. if zero else 1.)
bn.weight.data.fill_(return bn
@delegates(nn.BatchNorm2d)
def BatchNorm(nf, ndim=2, norm_type=NormType.Batch, **kwargs):
"BatchNorm layer with `nf` features and `ndim` initialized depending on `norm_type`."
return _get_norm('BatchNorm', nf, ndim, zero=norm_type==NormType.BatchZero, **kwargs)
@delegates(nn.InstanceNorm2d)
def InstanceNorm(nf, ndim=2, norm_type=NormType.Instance, affine=True, **kwargs):
"InstanceNorm layer with `nf` features and `ndim` initialized depending on `norm_type`."
return _get_norm('InstanceNorm', nf, ndim, zero=norm_type==NormType.InstanceZero, affine=affine, **kwargs)
kwargs
被传递给 nn.BatchNorm
,可以是 eps
、momentum
、affine
和 track_running_stats
。
= BatchNorm(15)
tst assert isinstance(tst, nn.BatchNorm2d)
15))
test_eq(tst.weight, torch.ones(= BatchNorm(15, norm_type=NormType.BatchZero)
tst 15))
test_eq(tst.weight, torch.zeros(= BatchNorm(15, ndim=1)
tst assert isinstance(tst, nn.BatchNorm1d)
= BatchNorm(15, ndim=3)
tst assert isinstance(tst, nn.BatchNorm3d)
= InstanceNorm(15)
tst assert isinstance(tst, nn.InstanceNorm2d)
15))
test_eq(tst.weight, torch.ones(= InstanceNorm(15, norm_type=NormType.InstanceZero)
tst 15))
test_eq(tst.weight, torch.zeros(= InstanceNorm(15, ndim=1)
tst assert isinstance(tst, nn.InstanceNorm1d)
= InstanceNorm(15, ndim=3)
tst assert isinstance(tst, nn.InstanceNorm3d)
如果 affine
为假,权重应该为 None
15, affine=False).weight, None)
test_eq(BatchNorm(15, affine=False).weight, None) test_eq(InstanceNorm(
class BatchNorm1dFlat(nn.BatchNorm1d):
"`nn.BatchNorm1d`, but first flattens leading dimensions"
def forward(self, x):
if x.dim()==2: return super().forward(x)
*f,l = x.shape
= x.contiguous().view(-1,l)
x return super().forward(x).view(*f,l)
= BatchNorm1dFlat(15)
tst = torch.randn(32, 64, 15)
x = tst(x)
y = x.mean(dim=[0,1])
mean 0*0.9 + mean*0.1)
test_close(tst.running_mean, = (x-mean).pow(2).mean(dim=[0,1])
var 1*0.9 + var*0.1, eps=1e-4)
test_close(tst.running_var, -mean)/torch.sqrt(var+1e-5) * tst.weight + tst.bias, eps=1e-4) test_close(y, (x
class LinBnDrop(nn.Sequential):
"Module grouping `BatchNorm1d`, `Dropout` and `Linear` layers"
def __init__(self, n_in, n_out, bn=True, p=0., act=None, lin_first=False):
= [BatchNorm(n_out if lin_first else n_in, ndim=1)] if bn else []
layers if p != 0: layers.append(nn.Dropout(p))
= [nn.Linear(n_in, n_out, bias=not bn)]
lin if act is not None: lin.append(act)
= lin+layers if lin_first else layers+lin
layers super().__init__(*layers)
如果bn=False
,则跳过BatchNorm
层,若p=0.
则跳过dropout。可选择在线性层之后添加激活函数act
。
= LinBnDrop(10, 20)
tst = list(tst.children())
mods len(mods), 2)
test_eq(assert isinstance(mods[0], nn.BatchNorm1d)
assert isinstance(mods[1], nn.Linear)
= LinBnDrop(10, 20, p=0.1)
tst = list(tst.children())
mods len(mods), 3)
test_eq(assert isinstance(mods[0], nn.BatchNorm1d)
assert isinstance(mods[1], nn.Dropout)
assert isinstance(mods[2], nn.Linear)
= LinBnDrop(10, 20, act=nn.ReLU(), lin_first=True)
tst = list(tst.children())
mods len(mods), 3)
test_eq(assert isinstance(mods[0], nn.Linear)
assert isinstance(mods[1], nn.ReLU)
assert isinstance(mods[2], nn.BatchNorm1d)
= LinBnDrop(10, 20, bn=False)
tst = list(tst.children())
mods len(mods), 1)
test_eq(assert isinstance(mods[0], nn.Linear)
初始化
def sigmoid(input, eps=1e-7):
"Same as `torch.sigmoid`, plus clamping to `(eps,1-eps)"
return input.sigmoid().clamp(eps,1-eps)
def sigmoid_(input, eps=1e-7):
"Same as `torch.sigmoid_`, plus clamping to `(eps,1-eps)"
return input.sigmoid_().clamp_(eps,1-eps)
from torch.nn.init import kaiming_uniform_,uniform_,xavier_uniform_,normal_
def vleaky_relu(input, inplace=True):
"`F.leaky_relu` with 0.3 slope"
return F.leaky_relu(input, negative_slope=0.3, inplace=inplace)
for o in F.relu,nn.ReLU,F.relu6,nn.ReLU6,F.leaky_relu,nn.LeakyReLU:
= kaiming_uniform_ o.__default_init__
for o in F.sigmoid,nn.Sigmoid,F.tanh,nn.Tanh,sigmoid,sigmoid_:
= xavier_uniform_ o.__default_init__
def init_default(m, func=nn.init.kaiming_normal_):
"Initialize `m` weights with `func` and set `bias` to 0."
if func and hasattr(m, 'weight'): func(m.weight)
with torch.no_grad(): nested_callable(m, 'bias.fill_')(0.)
return m
def init_linear(m, act_func=None, init='auto', bias_std=0.01):
if getattr(m,'bias',None) is not None and bias_std is not None:
if bias_std != 0: normal_(m.bias, 0, bias_std)
else: m.bias.data.zero_()
if init=='auto':
if act_func in (F.relu_,F.leaky_relu_): init = kaiming_uniform_
else: init = nested_callable(act_func, '__class__.__default_init__')
if init == noop: init = getcallable(act_func, '__default_init__')
if callable(init): init(m.weight)
卷积
def _conv_func(ndim=2, transpose=False):
"Return the proper conv `ndim` function, potentially `transposed`."
assert 1 <= ndim <=3
return getattr(nn, f'Conv{"Transpose" if transpose else ""}{ndim}d')
=1),torch.nn.modules.conv.Conv1d)
test_eq(_conv_func(ndim=2),torch.nn.modules.conv.Conv2d)
test_eq(_conv_func(ndim=3),torch.nn.modules.conv.Conv3d)
test_eq(_conv_func(ndim=1, transpose=True),torch.nn.modules.conv.ConvTranspose1d)
test_eq(_conv_func(ndim=2, transpose=True),torch.nn.modules.conv.ConvTranspose2d)
test_eq(_conv_func(ndim=3, transpose=True),torch.nn.modules.conv.ConvTranspose3d) test_eq(_conv_func(ndim
=nn.ReLU defaults.activation
class ConvLayer(nn.Sequential):
"Create a sequence of convolutional (`ni` to `nf`), ReLU (if `use_activ`) and `norm_type` layers."
@delegates(nn.Conv2d)
def __init__(self, ni, nf, ks=3, stride=1, padding=None, bias=None, ndim=2, norm_type=NormType.Batch, bn_1st=True,
=defaults.activation, transpose=False, init='auto', xtra=None, bias_std=0.01, **kwargs):
act_clsif padding is None: padding = ((ks-1)//2 if not transpose else 0)
= norm_type in (NormType.Batch, NormType.BatchZero)
bn = norm_type in (NormType.Instance, NormType.InstanceZero)
inn if bias is None: bias = not (bn or inn)
= _conv_func(ndim, transpose=transpose)
conv_func = conv_func(ni, nf, kernel_size=ks, bias=bias, stride=stride, padding=padding, **kwargs)
conv = None if act_cls is None else act_cls()
act =init, bias_std=bias_std)
init_linear(conv, act, initif norm_type==NormType.Weight: conv = weight_norm(conv)
elif norm_type==NormType.Spectral: conv = spectral_norm(conv)
= [conv]
layers = []
act_bn if act is not None: act_bn.append(act)
if bn: act_bn.append(BatchNorm(nf, norm_type=norm_type, ndim=ndim))
if inn: act_bn.append(InstanceNorm(nf, norm_type=norm_type, ndim=ndim))
if bn_1st: act_bn.reverse()
+= act_bn
layers if xtra: layers.append(xtra)
super().__init__(*layers)
卷积使用 ks
(卷积核大小)、stride
、padding
和 bias
。如果不是转置卷积,padding
默认值为适当的值((ks-1)//2
),而 bias
默认值为 True
如果 norm_type
是 Spectral
或 Weight
,如果是 Batch
或 BatchZero
则为 False
。请注意,如果您不想要任何归一化,应该将 norm_type
传递为 None
。
这定义了一个具有 ndim
(1、2 或 3)的卷积层,如果 transpose=True
,则将是一个转置卷积。act_cls
是要使用的激活函数的类(在内部实例化)。如果您不希望有激活函数,请传递 act=None
。如果您想快速更改默认激活,可以更改 defaults.activation
的值。
init
用于初始化权重(偏置初始化为 0),而 xtra
是一个可选层,可以在最后添加。
= ConvLayer(16, 32)
tst = list(tst.children())
mods len(mods), 3)
test_eq(1].weight, torch.ones(32))
test_eq(mods[0].padding, (1,1)) test_eq(mods[
= torch.randn(64, 16, 8, 8)#.cuda() x
#选择填充以使形状在步幅为1时保持一致
64,32,8,8]) test_eq(tst(x).shape, [
#选择填充以使形状在步幅为2时减半
= ConvLayer(16, 32, stride=2)
tst 64,32,4,4]) test_eq(tst(x).shape, [
#但如果你愿意,你可以随时传递自己的填充物
= ConvLayer(16, 32, padding=0)
tst 64,32,6,6]) test_eq(tst(x).shape, [
#默认情况下,批量归一化无偏差
assert mods[0].bias is None
#但可以通过设置`bias=True`来覆盖此默认行为
= ConvLayer(16, 32, bias=True)
tst assert first(tst.children()).bias is not None
#对于没有范数或谱/权重的情形,默认偏置为True
for t in [None, NormType.Spectral, NormType.Weight]:
= ConvLayer(16, 32, norm_type=t)
tst assert first(tst.children()).bias is not None
#各种n维/转置操作
= ConvLayer(16, 32, ndim=3)
tst assert isinstance(list(tst.children())[0], nn.Conv3d)
= ConvLayer(16, 32, ndim=1, transpose=True)
tst assert isinstance(list(tst.children())[0], nn.ConvTranspose1d)
#无激活/泄漏
= ConvLayer(16, 32, ndim=3, act_cls=None)
tst = list(tst.children())
mods len(mods), 2)
test_eq(= ConvLayer(16, 32, ndim=3, act_cls=partial(nn.LeakyReLU, negative_slope=0.1))
tst = list(tst.children())
mods len(mods), 3)
test_eq(assert isinstance(mods[2], nn.LeakyReLU)
# #导出
# def linear(in_features, out_features, bias=True, act_cls=None, init='auto'):
# "Linear layer followed by optional activation, with optional auto-init"
# res = nn.Linear(输入特征数, 输出特征数, 偏置=偏置)
# 如果 act_cls 存在:则实例化 act_cls
# init_linear(res, act_cls, 初始化=初始化)
# 如果 act_cls 存在:res = nn.Sequential(res, act_cls)
# 返回结果
# #导出
# @委托(ConvLayer)
# def conv1d(ni, nf, ks, stride=1, ndim=1, norm_type=None, **kwargs):
# "Convolutional layer followed by optional activation, with optional auto-init"
# 返回卷积层(输入通道数为ni,输出通道数为nf,卷积核大小为ks,步幅为stride,维度为ndim,归一化类型为norm_type,以及其他关键字参数**kwargs)
# #导出
# @委托(ConvLayer)
# def conv2d(ni, nf, ks, stride=1, ndim=2, norm_type=None, **kwargs):
# "Convolutional layer followed by optional activation, with optional auto-init"
# 返回卷积层(输入通道数为ni,输出通道数为nf,卷积核大小为ks,步幅为stride,维度为ndim,归一化类型为norm_type,以及其他关键字参数**kwargs)
# #导出
# @委托(ConvLayer)
# def conv3d(ni, nf, ks, stride=1, ndim=3, norm_type=None, **kwargs):
# "Convolutional layer followed by optional activation, with optional auto-init"
# 返回卷积层(输入通道数为ni,输出通道数为nf,卷积核大小为ks,步幅为stride,维度为ndim,归一化类型为norm_type,以及其他参数**kwargs)。
def AdaptiveAvgPool(sz=1, ndim=2):
"nn.AdaptiveAvgPool layer for `ndim`"
assert 1 <= ndim <= 3
return getattr(nn, f"AdaptiveAvgPool{ndim}d")(sz)
def MaxPool(ks=2, stride=None, padding=0, ndim=2, ceil_mode=False):
"nn.MaxPool layer for `ndim`"
assert 1 <= ndim <= 3
return getattr(nn, f"MaxPool{ndim}d")(ks, stride=stride, padding=padding)
def AvgPool(ks=2, stride=None, padding=0, ndim=2, ceil_mode=False):
"nn.AvgPool layer for `ndim`"
assert 1 <= ndim <= 3
return getattr(nn, f"AvgPool{ndim}d")(ks, stride=stride, padding=padding, ceil_mode=ceil_mode)
嵌入表示
def trunc_normal_(x, mean=0., std=1.):
"Truncated normal initialization (approximation)"
# 来自 https://discuss.pytorch.org/t/实现截断正态分布初始化器/4778/12
return x.normal_().fmod_(2).mul_(std).add_(mean)
class Embedding(nn.Embedding):
"Embedding layer with truncated normal initialization"
def __init__(self, ni, nf, std=0.01):
super().__init__(ni, nf)
self.weight.data, std=std) trunc_normal_(
截断正态初始化将分布限制在一个范围内,以避免产生较大的值。对于给定的标准差 std
,其界限大致为 -2*std
和 2*std
。
= 0.02
std = Embedding(10, 30, std)
tst assert tst.weight.min() > -2*std
assert tst.weight.max() < 2*std
0, 1e-2)
test_close(tst.weight.mean(), 0.1) test_close(tst.weight.std(), std,
自注意力
class SelfAttention(Module):
"Self attention layer for `n_channels`."
def __init__(self, n_channels):
self.query,self.key,self.value = [self._conv(n_channels, c) for c in (n_channels//8,n_channels//8,n_channels)]
self.gamma = nn.Parameter(tensor([0.]))
def _conv(self,n_in,n_out):
return ConvLayer(n_in, n_out, ks=1, ndim=1, norm_type=NormType.Spectral, act_cls=None, bias=False)
def forward(self, x):
#论文中的符号表示。
= x.size()
size = x.view(*size[:2],-1)
x = self.query(x),self.key(x),self.value(x)
f,g,h = F.softmax(torch.bmm(f.transpose(1,2), g), dim=1)
beta = self.gamma * torch.bmm(h, beta) + x
o return o.view(*size).contiguous()
自注意力层如在自注意力生成对抗网络中所介绍。
最初,对输入没有任何改变。这是通过一个可训练的参数gamma
来控制的,因为我们返回的是x + gamma * out
。
= SelfAttention(16)
tst = torch.randn(32, 16, 8, 8)
x test_eq(tst(x),x)
然后在训练过程中,gamma
可能会改变,因为它是一个可训练的参数。我们来看看当它获得非零值时会发生什么。
1.)
tst.gamma.data.fill_(= tst(x)
y 32,16,8,8]) test_eq(y.shape, [
注意力机制需要进行三次矩阵乘法(这里用1x1卷积表示)。这些乘法是在通道级别进行的(在我们的张量中是第二维),我们将特征图扁平化(这里是8x8)。与论文中一样,我们将这些乘法的结果分别记为 f
、g
和 h
。
= tst.query[0].weight.data,tst.key[0].weight.data,tst.value[0].weight.data
q,k,v 2, 16, 1], [2, 16, 1], [16, 16, 1]])
test_eq([q.shape, k.shape, v.shape], [[= map(lambda m: x.view(32, 16, 64).transpose(1,2) @ m.squeeze().t(), [q,k,v])
f,g,h 32,64,2], [32,64,2], [32,64,16]]) test_eq([f.shape, g.shape, h.shape], [[
注意力层的关键部分是为特征图中的每个位置计算注意力权重(这里是8x8 = 64)。这些权重是正数,总和为1,告诉模型关注图像的某个部分。我们将 f
和 g
的转置相乘(得到大小为bs x 64 x 64的结果),然后在第一个维度上应用softmax(以获得总和为1的正数)。然后将结果与转置后的h
相乘,得到大小为bs x 通道 x 64的输出,这样我们就可以将其视为与原始输入相同大小的输出。
最终结果是 x + gamma * out
,正如我们之前看到的。
= F.softmax(torch.bmm(f, g.transpose(1,2)), dim=1)
beta 32, 64, 64])
test_eq(beta.shape, [= torch.bmm(h.transpose(1,2), beta)
out 32, 16, 64])
test_eq(out.shape, [+ out.view(32, 16, 8, 8), eps=1e-4) test_close(y, x
class PooledSelfAttention2d(Module):
"Pooled self attention layer for 2d."
def __init__(self, n_channels):
self.n_channels = n_channels
self.query,self.key,self.value = [self._conv(n_channels, c) for c in (n_channels//8,n_channels//8,n_channels//2)]
self.out = self._conv(n_channels//2, n_channels)
self.gamma = nn.Parameter(tensor([0.]))
def _conv(self,n_in,n_out):
return ConvLayer(n_in, n_out, ks=1, norm_type=NormType.Spectral, act_cls=None, bias=False)
def forward(self, x):
= x.shape[2]*x.shape[3]
n_ftrs = self.query(x).view(-1, self.n_channels//8, n_ftrs)
f = F.max_pool2d(self.key(x), [2,2]).view(-1, self.n_channels//8, n_ftrs//4)
g = F.max_pool2d(self.value(x), [2,2]).view(-1, self.n_channels//2, n_ftrs//4)
h = F.softmax(torch.bmm(f.transpose(1, 2), g), -1)
beta = self.out(torch.bmm(h, beta.transpose(1,2)).view(-1, self.n_channels//2, x.shape[2], x.shape[3]))
o return self.gamma * o + x
在Big GAN 论文中使用的自注意力层。
它使用与SelfAttention
中的相同注意力,但在计算矩阵g
和h
之前添加了步幅为2的最大池化:注意力作用于2x2的最大池化窗口之一,而不是整个特征图。在最终输出之前,还增加了一个矩阵乘法,最后返回gamma * out + x
。
def _conv1d_spect(ni:int, no:int, ks:int=1, stride:int=1, padding:int=0, bias:bool=False):
"Create and initialize a `nn.Conv1d` layer with spectral normalization."
= nn.Conv1d(ni, no, ks, stride=stride, padding=padding, bias=bias)
conv
nn.init.kaiming_normal_(conv.weight)if bias: conv.bias.data.zero_()
return spectral_norm(conv)
class SimpleSelfAttention(Module):
def __init__(self, n_in:int, ks=1, sym=False):
self.sym,self.n_in = sym,n_in
self.conv = _conv1d_spect(n_in, n_in, ks, padding=ks//2, bias=False)
self.gamma = nn.Parameter(tensor([0.]))
def forward(self,x):
if self.sym:
= self.conv.weight.view(self.n_in,self.n_in)
c = (c + c.t())/2
c self.conv.weight = c.view(self.n_in,self.n_in,1)
= x.size()
size = x.view(*size[:2],-1)
x
= self.conv(x)
convx = torch.bmm(x,x.permute(0,2,1).contiguous())
xxT = torch.bmm(xxT, convx)
o = self.gamma * o + x
o return o.view(*size).contiguous()
像素重排 (PixelShuffle)
PixelShuffle 在这篇文章中引入,以避免在放大图像时出现棋盘效应。如果我们希望输出具有 ch_out
个滤波器,我们使用一个具有 ch_out * (r**2)
个滤波器的卷积,其中 r
是上采样因子。然后我们按照下面的图片重新组织这些滤波器:
def icnr_init(x, scale=2, init=nn.init.kaiming_normal_):
"ICNR init of `x`, with `scale` and `init` function"
= x.shape
ni,nf,h,w = int(ni/(scale**2))
ni2 = init(x.new_zeros([ni2,nf,h,w])).transpose(0, 1)
k = k.contiguous().view(ni2, nf, -1)
k = k.repeat(1, 1, scale**2)
k return k.contiguous().view([nf,ni,h,w]).transpose(0, 1)
ICNR 初始化是在 这篇文章 中提出的。它建议对将在 PixelShuffle 中使用的卷积进行初始化,以使得每个 r**2
通道获得相同的权重(这样在上面的图片中,3x3 窗口中的 9 种颜色最初是相同的)。
这是在第一维度上完成的,因为 PyTorch 以这种格式存储卷积层的权重:ch_out x ch_in x ks x ks
。
= torch.randn(16*4, 32, 1, 1)
tst = icnr_init(tst)
tst for i in range(0,16*4,4):
+1])
test_eq(tst[i],tst[i+2])
test_eq(tst[i],tst[i+3]) test_eq(tst[i],tst[i
class PixelShuffle_ICNR(nn.Sequential):
"Upsample by `scale` from `ni` filters to `nf` (default `ni`), using `nn.PixelShuffle`."
def __init__(self, ni, nf=None, scale=2, blur=False, norm_type=NormType.Weight, act_cls=defaults.activation):
super().__init__()
= ifnone(nf, ni)
nf = [ConvLayer(ni, nf*(scale**2), ks=1, norm_type=norm_type, act_cls=act_cls, bias_std=0),
layers
nn.PixelShuffle(scale)]if norm_type == NormType.Weight:
0][0].weight_v.data.copy_(icnr_init(layers[0][0].weight_v.data))
layers[0][0].weight_g.data.copy_(((layers[0][0].weight_v.data**2).sum(dim=[1,2,3])**0.5)[:,None,None,None])
layers[else:
0][0].weight.data.copy_(icnr_init(layers[0][0].weight.data))
layers[if blur: layers += [nn.ReplicationPad2d((1,0,1,0)), nn.AvgPool2d(2, stride=1)]
super().__init__(*layers)
卷积层使用icnr_init
初始化,并传入act_cls
和norm_type
(在我们的实验中,权重归一化的默认值似乎是超分辨率问题的最佳选择)。
blur
选项源于使用卷积神经网络消除任何棋盘伪影的超分辨率,作者在其中添加了一点模糊,以完全消除棋盘伪影。
= PixelShuffle_ICNR(16)
psfl = torch.randn(64, 16, 8, 8)
x = psfl(x)
y 64, 16, 16, 16])
test_eq(y.shape, [#ICNR初始化使得每个2x2窗口(步长为2)内的元素相同
for i in range(0,16,2):
for j in range(0,16,2):
+1,j])
test_eq(y[:,:,i,j],y[:,:,i+1])
test_eq(y[:,:,i,j],y[:,:,i ,j+1,j+1]) test_eq(y[:,:,i,j],y[:,:,i
= PixelShuffle_ICNR(16, norm_type=None)
psfl = torch.randn(64, 16, 8, 8)
x = psfl(x)
y 64, 16, 16, 16])
test_eq(y.shape, [#ICNR初始化使得每个2x2窗口(步长为2)内的元素相同
for i in range(0,16,2):
for j in range(0,16,2):
+1,j])
test_eq(y[:,:,i,j],y[:,:,i+1])
test_eq(y[:,:,i,j],y[:,:,i ,j+1,j+1]) test_eq(y[:,:,i,j],y[:,:,i
= PixelShuffle_ICNR(16, norm_type=NormType.Spectral)
psfl = torch.randn(64, 16, 8, 8)
x = psfl(x)
y 64, 16, 16, 16])
test_eq(y.shape, [#ICNR初始化使得每个2x2窗口(步长为2)具有相同的元素
for i in range(0,16,2):
for j in range(0,16,2):
+1,j])
test_eq(y[:,:,i,j],y[:,:,i+1])
test_eq(y[:,:,i,j],y[:,:,i ,j+1,j+1]) test_eq(y[:,:,i,j],y[:,:,i
顺序扩展
def sequential(*args):
"Create an `nn.Sequential`, wrapping items with `Lambda` if needed"
if len(args) != 1 or not isinstance(args[0], OrderedDict):
= list(args)
args for i,o in enumerate(args):
if not isinstance(o,nn.Module): args[i] = Lambda(o)
return nn.Sequential(*args)
class SequentialEx(Module):
"Like `nn.Sequential`, but with ModuleList semantics, and can access module input"
def __init__(self, *layers): self.layers = nn.ModuleList(layers)
def forward(self, x):
= x
res for l in self.layers:
= x
res.orig = l(res)
nres # 我们必须移除res.orig,以避免悬挂引用,从而防止内存泄漏。
= None, None
res.orig, nres.orig = nres
res return res
def __getitem__(self,i): return self.layers[i]
def append(self,l): return self.layers.append(l)
def extend(self,l): return self.layers.extend(l)
def insert(self,i,l): return self.layers.insert(i,l)
这对于以顺序方式编写需要记住输入的层(如resnet块)是有用的。
class MergeLayer(Module):
"Merge a shortcut with the result of the module by adding them or concatenating them if `dense=True`."
def __init__(self, dense:bool=False): self.dense=dense
def forward(self, x): return torch.cat([x,x.orig], dim=1) if self.dense else (x+x.orig)
= SequentialEx(ConvLayer(16, 16), ConvLayer(16,16))
res_block # 只是为了测试追加 - 通常它会在初始参数中
res_block.append(MergeLayer()) = torch.randn(32, 16, 8, 8)
x = res_block(x)
y 32, 16, 8, 8])
test_eq(y.shape, [+ res_block[1](res_block[0](x))) test_eq(y, x
= TensorBase(torch.randn(32, 16, 8, 8))
x = res_block(x)
y None) test_is(y.orig,
连接
相当于 keras.layers.Concatenate,它将在给定维度(默认为滤波器维度)上连接 ModuleList 的输出。
::: {#cell-119 .cell 0=‘e’ 1=‘x’ 2=‘p’ 3=‘o’ 4=‘r’ 5=‘t’}
class Cat(nn.ModuleList):
"Concatenate layers outputs over a given dim"
def __init__(self, layers, dim=1):
self.dim=dim
super().__init__(layers)
def forward(self, x): return torch.cat([l(x) for l in self], dim=self.dim)
:::
= [ConvLayer(2,4), ConvLayer(2,4), ConvLayer(2,4)]
layers = torch.rand(1,2,8,8)
x = Cat(layers)
cat 1,12,8,8])
test_eq(cat(x).shape, [for l in layers], dim=1)) test_eq(cat(x), torch.cat([l(x)
可直接使用的模型
class SimpleCNN(nn.Sequential):
"Create a simple CNN with `filters`."
def __init__(self, filters, kernel_szs=None, strides=None, bn=True):
= len(filters)-1
nl = ifnone(kernel_szs, [3]*nl)
kernel_szs = ifnone(strides , [2]*nl)
strides = [ConvLayer(filters[i], filters[i+1], kernel_szs[i], stride=strides[i],
layers =(NormType.Batch if bn and i<nl-1 else None)) for i in range(nl)]
norm_type
layers.append(PoolFlatten())super().__init__(*layers)
该模型是从 (filters[0],filters[1])
到 (filters[n-2],filters[n-1])
的一系列卷积层(如果 n
是 filters
列表的长度),后面跟着一个 PoolFlatten
。kernel_szs
和 strides
默认值为一个由3组成的列表和一个由2组成的列表。如果 bn=True
,卷积层将是 conv-relu-batchnorm 的连续层;否则为 conv-relu。
= SimpleCNN([8,16,32])
tst = list(tst.children())
mods len(mods), 3)
test_eq(0].in_channels, m[0].out_channels] for m in mods[:2]], [[8,16], [16,32]]) test_eq([[m[
测试内核大小
= SimpleCNN([8,16,32], kernel_szs=[1,3])
tst = list(tst.children())
mods 0].kernel_size for m in mods[:2]], [(1,1), (3,3)]) test_eq([m[
测试步幅
= SimpleCNN([8,16,32], strides=[1,2])
tst = list(tst.children())
mods 0].stride for m in mods[:2]], [(1,1),(2,2)]) test_eq([m[
class ProdLayer(Module):
"Merge a shortcut with the result of the module by multiplying them."
def forward(self, x): return x * x.orig
= partial(nn.ReLU, inplace=True) inplace_relu
def SEModule(ch, reduction, act_cls=defaults.activation):
= math.ceil(ch//reduction/8)*8
nf return SequentialEx(nn.AdaptiveAvgPool2d(1),
=1, norm_type=None, act_cls=act_cls),
ConvLayer(ch, nf, ks=1, norm_type=None, act_cls=nn.Sigmoid),
ConvLayer(nf, ch, ks ProdLayer())
class ResBlock(Module):
"Resnet block from `ni` to `nh` with `stride`"
@delegates(ConvLayer.__init__)
def __init__(self, expansion, ni, nf, stride=1, groups=1, reduction=None, nh1=None, nh2=None, dw=False, g2=1,
=False, sym=False, norm_type=NormType.Batch, act_cls=defaults.activation, ndim=2, ks=3,
sa=AvgPool, pool_first=True, **kwargs):
pool= (NormType.BatchZero if norm_type==NormType.Batch else
norm2 if norm_type==NormType.Instance else norm_type)
NormType.InstanceZero if nh2 is None: nh2 = nf
if nh1 is None: nh1 = nh2
= nf*expansion,ni*expansion
nf,ni = dict(norm_type=norm_type, act_cls=act_cls, ndim=ndim, **kwargs)
k0 = dict(norm_type=norm2, act_cls=None, ndim=ndim, **kwargs)
k1 = [ConvLayer(ni, nh2, ks, stride=stride, groups=ni if dw else groups, **k0),
convpath =g2, **k1)
ConvLayer(nh2, nf, ks, groupsif expansion == 1 else [
] 1, **k0),
ConvLayer(ni, nh1, =stride, groups=nh1 if dw else groups, **k0),
ConvLayer(nh1, nh2, ks, stride1, groups=g2, **k1)]
ConvLayer(nh2, nf, if reduction: convpath.append(SEModule(nf, reduction=reduction, act_cls=act_cls))
if sa: convpath.append(SimpleSelfAttention(nf,ks=1,sym=sym))
self.convpath = nn.Sequential(*convpath)
= []
idpath if ni!=nf: idpath.append(ConvLayer(ni, nf, 1, act_cls=None, ndim=ndim, **kwargs))
if stride!=1: idpath.insert((1,0)[pool_first], pool(stride, ndim=ndim, ceil_mode=True))
self.idpath = nn.Sequential(*idpath)
self.act = defaults.activation(inplace=True) if act_cls is defaults.activation else act_cls()
def forward(self, x): return self.act(self.convpath(x) + self.idpath(x))
这是一个resnet块(正常或瓶颈,取决于expansion
,正常块为1,传统瓶颈为4),实现了来自Bag of Tricks for Image Classification with Convolutional Neural Networks的调整。特别地,最后一个batchnorm层(如果选择了norm_type
)的权重(或gamma)初始化为零,以促进从网络开始到结束的流动。它还实现了可选的Squeeze and Excitation和分组卷积,用于ResNeXT和类似模型(使用dw=True
进行深度卷积)。
kwargs
与norm_type
一起传递给ConvLayer
。
def SEBlock(expansion, ni, nf, groups=1, reduction=16, stride=1, **kwargs):
return ResBlock(expansion, ni, nf, stride=stride, groups=groups, reduction=reduction, nh1=nf*2, nh2=nf*expansion, **kwargs)
def SEResNeXtBlock(expansion, ni, nf, groups=32, reduction=16, stride=1, base_width=4, **kwargs):
= math.floor(nf * (base_width / 64)) * groups
w return ResBlock(expansion, ni, nf, stride=stride, groups=groups, reduction=reduction, nh2=w, **kwargs)
def SeparableBlock(expansion, ni, nf, reduction=16, stride=1, base_width=4, **kwargs):
return ResBlock(expansion, ni, nf, stride=stride, reduction=reduction, nh2=nf*2, dw=True, **kwargs)
时间分布层
等价于 Keras 的 TimeDistributed
层,能够在一个轴上计算 PyTorch 的 Module
。
def _stack_tups(tuples, stack_dim=1):
"Stack tuple of tensors along `stack_dim`"
return tuple(torch.stack([t[i] for t in tuples], dim=stack_dim) for i in range_of(tuples[0]))
class TimeDistributed(Module):
"Applies `module` over `tdim` identically for each step, use `low_mem` to compute one at a time."
def __init__(self, module, low_mem=False, tdim=1):
store_attr()
def forward(self, *tensors, **kwargs):
"input x with shape:(bs,seq_len,channels,width,height)"
if self.low_mem or self.tdim!=1:
return self.low_mem_forward(*tensors, **kwargs)
else:
#仅支持tdim=1
= tensors[0].shape
inp_shape = inp_shape[0], inp_shape[1]
bs, seq_len = self.module(*[x.view(bs*seq_len, *x.shape[2:]) for x in tensors], **kwargs)
out return self.format_output(out, bs, seq_len)
def low_mem_forward(self, *tensors, **kwargs):
"input x with shape:(bs,seq_len,channels,width,height)"
= tensors[0].shape[self.tdim]
seq_len = [torch.unbind(x, dim=self.tdim) for x in tensors]
args_split = []
out for i in range(seq_len):
self.module(*[args[i] for args in args_split]), **kwargs)
out.append(if isinstance(out[0], tuple):
return _stack_tups(out, stack_dim=self.tdim)
return torch.stack(out, dim=self.tdim)
def format_output(self, out, bs, seq_len):
"unstack from batchsize outputs"
if isinstance(out, tuple):
return tuple(out_i.view(bs, seq_len, *out_i.shape[1:]) for out_i in out)
return out.view(bs, seq_len,*out.shape[1:])
def __repr__(self):
return f'TimeDistributed({self.module})'
= 2, 5
bs, seq_len = torch.rand(bs,seq_len,3,2,2), torch.rand(bs,seq_len,3,2,2) x, y
= TimeDistributed(nn.Conv2d(3,4,1))
tconv 2,5,4,2,2))
test_eq(tconv(x).shape, (=True
tconv.low_mem2,5,4,2,2)) test_eq(tconv(x).shape, (
class Mod(Module):
def __init__(self):
self.conv = nn.Conv2d(3,4,1)
def forward(self, x, y):
return self.conv(x) + self.conv(y)
= TimeDistributed(Mod()) tmod
= tmod(x,y)
out 2,5,4,2,2))
test_eq(out.shape, (=True
tmod.low_mem= tmod(x,y)
out_low_mem 2,5,4,2,2))
test_eq(out_low_mem.shape, ( test_eq(out, out_low_mem)
class Mod2(Module):
def __init__(self):
self.conv = nn.Conv2d(3,4,1)
def forward(self, x, y):
return self.conv(x), self.conv(y)
= TimeDistributed(Mod2()) tmod2
= tmod2(x,y)
out len(out), 2)
test_eq(0].shape, (2,5,4,2,2))
test_eq(out[=True
tmod2.low_mem= tmod2(x,y)
out_low_mem 0].shape, (2,5,4,2,2))
test_eq(out_low_mem[ test_eq(out, out_low_mem)
show_doc(TimeDistributed)
该模块相当于Keras TimeDistributed Layer。这个包装器允许将一个层应用于输入的每个时间切片。默认情况下,时间轴(tdim
)假定是第一个(即批次大小之后的那个)。典型用法是使用图像编码器对图像序列进行编码。
TimeDistributed
的forward
函数支持*args
和**kkwargs
,但只有args
会被拆分并独立地传递给每个时间步的底层模块,kwargs
将原样传递。这在模块需要多个参数作为输入时非常有用,这样,您可以将所有需要拆分的张量放入args
中,而将不需要拆分的其他参数放入kwargs
中。
该模块对内存要求较高,因为它会尝试在批次维度上同时传递多个时间步。如果出现内存不足的错误,请首先尝试通过时间步数减少批次大小。
from fastai.vision.all import *
= create_body(resnet18()) encoder
一个resnet18将编码一个512通道的特征图。高度和宽度将被缩小为32分之一。
= TimeDistributed(encoder) time_resnet
一个合成批次包含2个图像序列,每个序列的长度为5。(批量大小, 序列长度, 通道数, 宽度, 高度)
= torch.rand(2, 5, 3, 64, 64) image_sequence
time_resnet(image_sequence).shape
torch.Size([2, 5, 512, 2, 2])
通过这种方式,可以在特征空间中对一系列图像进行编码。 还有一个 low_mem_forward
,它将一次传递一张图像,以减少 GPU 内存消耗。
time_resnet.low_mem_forward(image_sequence).shape
torch.Size([2, 5, 512, 2, 2])
Swish 和 Mish
from torch.jit import script
@script
def _swish_jit_fwd(x): return x.mul(torch.sigmoid(x))
@script
def _swish_jit_bwd(x, grad_output):
= torch.sigmoid(x)
x_sigmoid return grad_output * (x_sigmoid * (1 + x * (1 - x_sigmoid)))
class _SwishJitAutoFn(torch.autograd.Function):
@staticmethod
def forward(ctx, x):
ctx.save_for_backward(x)return _swish_jit_fwd(x)
@staticmethod
def backward(ctx, grad_output):
= ctx.saved_variables[0]
x return _swish_jit_bwd(x, grad_output)
def swish(x, inplace=False): F.silu(x, inplace=inplace)
class SwishJit(Module):
def forward(self, x): return _SwishJitAutoFn.apply(x)
@script
def _mish_jit_fwd(x): return x.mul(torch.tanh(F.softplus(x)))
@script
def _mish_jit_bwd(x, grad_output):
= torch.sigmoid(x)
x_sigmoid = F.softplus(x).tanh()
x_tanh_sp return grad_output.mul(x_tanh_sp + x * x_sigmoid * (1 - x_tanh_sp * x_tanh_sp))
class MishJitAutoFn(torch.autograd.Function):
@staticmethod
def forward(ctx, x):
ctx.save_for_backward(x)return _mish_jit_fwd(x)
@staticmethod
def backward(ctx, grad_output):
= ctx.saved_variables[0]
x return _mish_jit_bwd(x, grad_output)
def mish(x, inplace=False): return F.mish(x, inplace=inplace)
class MishJit(Module):
def forward(self, x): return MishJitAutoFn.apply(x)
= nn.Mish
Mish = nn.SiLU Swish
for o in swish,Swish,SwishJit,mish,Mish,MishJit: o.__default_init__ = kaiming_uniform_
子模块的辅助函数
获取给定模型的所有参数列表很简单。当您想要获取所有子模块(如线性/卷积层)而不忘记独立参数时,以下类可以将它们包装在虚拟模块中。
class ParameterModule(Module):
"Register a lone parameter `p` in a module."
def __init__(self, p): self.val = p
def forward(self, x): return x
def children_and_parameters(m):
"Return the children of `m` and its direct parameters not registered in modules."
= list(m.children())
children = sum([[id(p) for p in c.parameters()] for c in m.children()],[])
children_p for p in m.parameters():
if id(p) not in children_p: children.append(ParameterModule(p))
return children
class TstModule(Module):
def __init__(self): self.a,self.lin = nn.Parameter(torch.randn(1)),nn.Linear(5,10)
= TstModule()
tst = children_and_parameters(tst)
children len(children), 2)
test_eq(0], tst.lin)
test_eq(children[assert isinstance(children[1], ParameterModule)
1].val, tst.a) test_eq(children[
def has_children(m):
try: next(m.children())
except StopIteration: return False
return True
class A(Module): pass
assert not has_children(A())
assert has_children(TstModule())
def flatten_model(m):
"Return the list of all submodules and parameters of `m`"
return sum(map(flatten_model,children_and_parameters(m)),[]) if has_children(m) else [m]
= nn.Sequential(TstModule(), TstModule())
tst = flatten_model(tst)
children len(children), 4)
test_eq(assert isinstance(children[1], ParameterModule)
assert isinstance(children[3], ParameterModule)
class NoneReduce():
"A context manager to evaluate `loss_func` with none reduce."
def __init__(self, loss_func): self.loss_func,self.old_red = loss_func,None
def __enter__(self):
if hasattr(self.loss_func, 'reduction'):
self.old_red = self.loss_func.reduction
self.loss_func.reduction = 'none'
return self.loss_func
else: return partial(self.loss_func, reduction='none')
def __exit__(self, type, value, traceback):
if self.old_red is not None: self.loss_func.reduction = self.old_red
= torch.randn(5),torch.randn(5)
x,y = nn.MSELoss()
loss_fn with NoneReduce(loss_fn) as loss_func:
= loss_func(x,y)
loss 5])
test_eq(loss.shape, ['mean')
test_eq(loss_fn.reduction,
= F.mse_loss
loss_fn with NoneReduce(loss_fn) as loss_func:
= loss_func(x,y)
loss 5])
test_eq(loss.shape, [ test_eq(loss_fn, F.mse_loss)
def in_channels(m):
"Return the shape of the first weight layer in `m`."
try: return next(l.weight.shape[1] for l in flatten_model(m) if nested_attr(l,'weight.ndim',-1)==4)
except StopIteration as e: e.args = ["No weight layer"]; raise
5,4,3), nn.Conv2d(4,3,3))), 5)
test_eq(in_channels(nn.Sequential(nn.Conv2d(4), nn.Conv2d(4,3,3))), 4)
test_eq(in_channels(nn.Sequential(nn.AvgPool2d(4), nn.Conv2d(4,3,3))), 4)
test_eq(in_channels(nn.Sequential(BatchNorm(4), nn.Conv2d(4,3,3))), 4)
test_eq(in_channels(nn.Sequential(InstanceNorm(4, affine=False), nn.Conv2d(4,3,3))), 4)
test_eq(in_channels(nn.Sequential(InstanceNorm(lambda : in_channels(nn.Sequential(nn.AvgPool2d(4)))) test_fail(
导出 -
from nbdev import *
nbdev_export()
Converted 00_torch_core.ipynb.
Converted 01_layers.ipynb.
Converted 01a_losses.ipynb.
Converted 02_data.load.ipynb.
Converted 03_data.core.ipynb.
Converted 04_data.external.ipynb.
Converted 05_data.transforms.ipynb.
Converted 06_data.block.ipynb.
Converted 07_vision.core.ipynb.
Converted 08_vision.data.ipynb.
Converted 09_vision.augment.ipynb.
Converted 09b_vision.utils.ipynb.
Converted 09c_vision.widgets.ipynb.
Converted 10_tutorial.pets.ipynb.
Converted 10b_tutorial.albumentations.ipynb.
Converted 11_vision.models.xresnet.ipynb.
Converted 12_optimizer.ipynb.
Converted 13_callback.core.ipynb.
Converted 13a_learner.ipynb.
Converted 13b_metrics.ipynb.
Converted 14_callback.schedule.ipynb.
Converted 14a_callback.data.ipynb.
Converted 15_callback.hook.ipynb.
Converted 15a_vision.models.unet.ipynb.
Converted 16_callback.progress.ipynb.
Converted 17_callback.tracker.ipynb.
Converted 18_callback.fp16.ipynb.
Converted 18a_callback.training.ipynb.
Converted 18b_callback.preds.ipynb.
Converted 19_callback.mixup.ipynb.
Converted 20_interpret.ipynb.
Converted 20a_distributed.ipynb.
Converted 20b_tutorial.distributed.ipynb.
Converted 21_vision.learner.ipynb.
Converted 22_tutorial.imagenette.ipynb.
Converted 23_tutorial.vision.ipynb.
Converted 24_tutorial.image_sequence.ipynb.
Converted 24_tutorial.siamese.ipynb.
Converted 24_vision.gan.ipynb.
Converted 30_text.core.ipynb.
Converted 31_text.data.ipynb.
Converted 32_text.models.awdlstm.ipynb.
Converted 33_text.models.core.ipynb.
Converted 34_callback.rnn.ipynb.
Converted 35_tutorial.wikitext.ipynb.
Converted 37_text.learner.ipynb.
Converted 38_tutorial.text.ipynb.
Converted 39_tutorial.transformers.ipynb.
Converted 40_tabular.core.ipynb.
Converted 41_tabular.data.ipynb.
Converted 42_tabular.model.ipynb.
Converted 43_tabular.learner.ipynb.
Converted 44_tutorial.tabular.ipynb.
Converted 45_collab.ipynb.
Converted 46_tutorial.collab.ipynb.
Converted 50_tutorial.datablock.ipynb.
Converted 60_medical.imaging.ipynb.
Converted 61_tutorial.medical_imaging.ipynb.
Converted 65_medical.text.ipynb.
Converted 70_callback.wandb.ipynb.
Converted 70a_callback.tensorboard.ipynb.
Converted 70b_callback.neptune.ipynb.
Converted 70c_callback.captum.ipynb.
Converted 70d_callback.comet.ipynb.
Converted 74_huggingface.ipynb.
Converted 97_test_utils.ipynb.
Converted 99_pytorch_doc.ipynb.
Converted dev-setup.ipynb.
Converted app_examples.ipynb.
Converted camvid.ipynb.
Converted distributed_app_examples.ipynb.
Converted migrating_catalyst.ipynb.
Converted migrating_ignite.ipynb.
Converted migrating_lightning.ipynb.
Converted migrating_pytorch.ipynb.
Converted migrating_pytorch_verbose.ipynb.
Converted ulmfit.ipynb.
Converted index.ipynb.
Converted quick_start.ipynb.
Converted tutorial.ipynb.