! [ -e /content ] && pip install -Uqq fastai # 在Colab上升级fastai
视觉增强
::: {#cell-2 .cell 0=‘d’ 1=‘e’ 2=‘f’ 3=‘a’ 4=‘u’ 5=‘l’ 6=‘t’ 7=’_’ 8=‘e’ 9=‘x’ 10=‘p’ 11=’ ’ 12=‘v’ 13=‘i’ 14=‘s’ 15=‘i’ 16=‘o’ 17=‘n’ 18=‘.’ 19=‘a’ 20=‘u’ 21=‘g’ 22=‘m’ 23=‘e’ 24=‘n’ 25=‘t’}
### 默认类级别 3
:::
在计算机视觉中应用数据增强的变换
from __future__ import annotations
from fastai.data.all import *
from fastai.vision.core import *
from fastai.vision.data import *
在当今快节奏的世界中,时间管理已成为一项至关重要的技能。无论是在职场还是个人生活中,有效地管理时间都能显著提升生产力和生活质量。首先,制定明确的目标是时间管理的基础。这些目标应具体、可衡量、可实现、相关且有时限,即SMART原则。其次,优先级排序是关键。使用艾森豪威尔矩阵等工具,将任务分为重要且紧急、重要但不紧急、紧急但不重要以及既不紧急也不重要四类,有助于集中精力处理真正重要的事务。此外,避免拖延也是时间管理的重要一环。通过设定小目标、分解任务以及使用番茄工作法等技巧,可以有效克服拖延症。最后,定期回顾和调整时间管理策略,确保其适应不断变化的需求和环境,是持续提升时间管理能力的关键。from nbdev.showdoc import *
from torch import stack, zeros_like as t0, ones_like as t1
from torch.distributions.bernoulli import Bernoulli
= PILImage(PILImage.create(TEST_IMAGE).resize((600,400))) img
RandTransform-
class RandTransform(DisplayedTransform):
"A transform that before_call its state at each `__call__`"
= True,None,[],0
do,nm,supports,split_idx def __init__(self,
float=1., # 应用变换的概率
p:str=None,
nm:callable=None, # 可选的批处理预处理函数
before_call:**kwargs
):'p')
store_attr(super().__init__(**kwargs)
self.before_call = ifnone(before_call,self.before_call)
def before_call(self,
b, int, # 列车/验证数据集索引
split_idx:
):"This function can be overridden. Set `self.do` based on `self.p`"
self.do = self.p==1. or random.random() < self.p
def __call__(self,
b, int=None, # 列车/验证数据集索引
split_idx:**kwargs
):self.before_call(b, split_idx=split_idx)
return super().__call__(b, split_idx=split_idx, **kwargs) if self.do else b
对于所有的 Transform
,您可以在初始化时传递 encodes
和 decodes
,或者通过子类化并实现它们。您可以对每次 __call__
调用时被调用的 before_call
方法做同样的操作。请注意,为了确保输入和目标的一致状态,RandTransform
必须在元组级别应用。
默认情况下,before_call
的行为是以概率 p
执行变换(如果是子类化并想要调节该行为,则会检查属性 self.do
,如果存在的话,用于决定是否执行变换)。
默认情况下,RandTransform
仅应用于训练集,因此如果您直接调用它而不是通过 Datasets
,则必须传递 split_idx=0
。通过将变换的属性 split_idx
设置为 None
,可以更改这种行为。
RandTransform.before_call
<function __main__.RandTransform.before_call(self, b, split_idx: 'int')>
show_doc(RandTransform.before_call)
RandTransform.before_call
RandTransform.before_call (b, split_idx:int)
This function can be overridden. Set self.do
based on self.p
Type | Details | |
---|---|---|
b | ||
split_idx | int | Index of the train/valid dataset |
def _add1(x): return x+1
= RandTransform(enc=_add1, p=0.5)
dumb_tfm = 2,False,False
start,d1,d2 for _ in range(40):
= dumb_tfm(start, split_idx=0)
t if dumb_tfm.do: test_eq(t, start+1); d1=True
else: test_eq(t, start) ; d2=True
assert d1 and d2
dumb_tfm
_add1 -- {'p': 0.5}:
encodes: (object,object) -> _add1decodes:
项目转换
def _neg_axis(x, axis):
= -x[...,axis]
x[...,axis] return x
= (TensorImage,TensorMask,TensorPoint,TensorBBox) TensorTypes
@patch
def flip_lr(x:Image.Image): return x.transpose(Image.FLIP_LEFT_RIGHT)
@patch
def flip_lr(x:TensorImageBase): return x.flip(-1)
@patch
def flip_lr(x:TensorPoint): return TensorPoint(_neg_axis(x.clone(), 0))
@patch
def flip_lr(x:TensorBBox): return TensorBBox(TensorPoint(x.view(-1,2)).flip_lr().view(-1,4))
= subplots(1,2)
_,axs =axs[0], title='original')
show_image(img, ctx=axs[1], title='flipped'); show_image(img.flip_lr(), ctx
/var/folders/fk/s29n0g1x4qnbp5h0xvh8dsnm0000gn/T/ipykernel_5338/3686934465.py:3: DeprecationWarning: FLIP_LEFT_RIGHT is deprecated and will be removed in Pillow 10 (2023-07-01). Use Transpose.FLIP_LEFT_RIGHT instead.
def flip_lr(x:Image.Image): return x.transpose(Image.FLIP_LEFT_RIGHT)
def _pnt2tensor(pnts, sz):
= torch.zeros(*sz)
t for p in pnts: t[p[1],p[0]] = 1.
return t
= _pnt2tensor([[1,0], [2,1]], (3,3))
t = PILImage.create(t)
x = x.flip_lr()
y 1,0], [0,1]], (3,3)))
test_eq(tensor(array(y)), _pnt2tensor([[
= TensorPoint(tensor([[1.,0], [2,1]]) -1)
pnts 1.,0], [0,1]]) -1)
test_eq(pnts.flip_lr(), tensor([[
= TensorBBox((tensor([[1.,0., 2.,1]]) -1))
bbox 1.,0., 0.,1]]) -1) test_eq(bbox.flip_lr(), tensor([[
/var/folders/fk/s29n0g1x4qnbp5h0xvh8dsnm0000gn/T/ipykernel_5338/3686934465.py:3: DeprecationWarning: FLIP_LEFT_RIGHT is deprecated and will be removed in Pillow 10 (2023-07-01). Use Transpose.FLIP_LEFT_RIGHT instead.
def flip_lr(x:Image.Image): return x.transpose(Image.FLIP_LEFT_RIGHT)
class FlipItem(RandTransform):
"Randomly flip with probability `p`"
def __init__(self, p:float=0.5): super().__init__(p=p)
def encodes(self, x:(Image.Image,*TensorTypes)): return x.flip_lr()
调用 @patch
修饰的 flip_lr
行为用于 Image
、TensorImage
、TensorPoint
和 TensorBBox
= FlipItem(p=1.)
tflip =0), tensor([[1.,0., 0.,1]]) -1) test_eq(tflip(bbox,split_idx
@patch
def dihedral(x:PILImage,
int, # 应用二面角变换
k:
): return x if k==0 else x.transpose(k-1)
@patch
def dihedral(x:TensorImage,
int, # 应用二面角变换
k:
):if k in [1,3,4,7]: x = x.flip(-1)
if k in [2,4,5,7]: x = x.flip(-2)
if k in [3,5,6,7]: x = x.transpose(-1,-2)
return x
@patch
def dihedral(x:TensorPoint,
int, # 应用二面角变换
k:
):if k in [1,3,4,7]: x = _neg_axis(x, 0)
if k in [2,4,5,7]: x = _neg_axis(x, 1)
if k in [3,5,6,7]: x = x.flip(1)
return x
@patch
def dihedral(x:TensorBBox,
int, #应用二面角变换
k:
):= TensorPoint(x.view(-1,2)).dihedral(k).view(-1,2,2)
pnts = pnts.min(dim=1)[0],pnts.max(dim=1)[0]
tl,br return TensorBBox(torch.cat([tl, br], dim=1), img_size=x.img_size)
class DihedralItem(RandTransform):
"Randomly flip with probability `p`"
def before_call(self, b, split_idx):
super().before_call(b, split_idx)
self.k = random.randint(0,7)
def encodes(self, x:(Image.Image,*TensorTypes)): return x.dihedral(self.k)
调用 @patch
修饰的 PILImage
、TensorImage
、TensorPoint
和 TensorBBox
的二面体变换行为。
默认情况下,应用变换时这 8 种二面体变换(包括无操作)的被选中概率相同。您可以通过传递自定义的 draw
函数来定制此行为。要强制进行特定的翻转,您也可以传递一个介于 0 和 7 之间的整数。
= subplots(2, 4)
_,axs for ax in axs.flatten():
=1.)(img, split_idx=0), ctx=ax) show_image(DihedralItem(p
= _pnt2tensor([[1,0], [2,1]], (3,3))
t = PILImage.create(t)
x for i in range(8):
= x.dihedral(i)
y = tensor(array(y))
res = TensorPoint(tensor([[1.,0.], [2,1]]) -1)
pnts = pnts.dihedral(i), res.nonzero().flip(1).float()-1
a,b assert equals(a,b) or equals(a,b.flip(0))
使用裁剪、填充或拉伸进行调整大小
from torchvision.transforms.functional import pad as tvpad
'PadMode', **{o:o.lower() for o in ['Zeros', 'Border', 'Reflection']},
mk_class(="All possible padding mode as attributes to get tab-completion and typo-proofing") doc
= ['PadMode'] _all_
=3) show_doc(PadMode, title_level
PadMode
PadMode (*args, **kwargs)
All possible padding mode as attributes to get tab-completion and typo-proofing
= {'zeros': 'constant', 'border': 'edge', 'reflection': 'reflect'}
_pad_modes
@patch
def _do_crop_pad(x:Image.Image, sz, tl, orig_sz,
=PadMode.Zeros, resize_mode=BILINEAR, resize_to=None):
pad_modeif any(tl.ge(0)) or any(tl.add(sz).le(orig_sz)):
# 图像中至少有一个暗部,因此需要进行裁剪。
= tl.max(0)
c = x.crop((*c, *tl.add(sz).min(orig_sz)))
x if any(tl.lt(0)) or any(tl.add(sz).ge(orig_sz)):
# 至少有一个维度在图像之外,因此需要进行填充。
= (-tl).max(0)
p = (sz-orig_sz).add(tl).max(0)
f = tvpad(x, (*p, *f), padding_mode=_pad_modes[pad_mode])
x if resize_to is not None: x = x.resize(resize_to, resize_mode)
return x
@patch
def _do_crop_pad(x:TensorPoint, sz, tl, orig_sz, pad_mode=PadMode.Zeros, resize_to=None, **kwargs):
#断言 pad_mode 等于 PadMode.Zeros,"Only zero padding is supported for `TensorPoint` and `TensorBBox`"
= map(FloatTensor, (orig_sz,sz,tl))
orig_sz,sz,tl return TensorPoint((x+1)*orig_sz/sz - tl*2/sz - 1, sz=sz if resize_to is None else resize_to)
@patch
def _do_crop_pad(x:TensorBBox, sz, tl, orig_sz, pad_mode=PadMode.Zeros, resize_to=None, **kwargs):
= TensorPoint._do_crop_pad(x.view(-1,2), sz, tl, orig_sz, pad_mode, resize_to).view(-1,4)
bbox return TensorBBox(bbox, img_size=x.img_size)
@patch
def crop_pad(x:TensorBBox|TensorPoint|Image.Image,
int|tuple, # 输入的裁剪/填充尺寸,如果只指定一个值则重复使用
sz:tuple=None, # 裁剪/填充的可选左上角坐标,如果为 `None`,则进行中心裁剪
tl:tuple=None, # 输入的原始大小
orig_sz:=PadMode.Zeros, # Fastai填充模式
pad_mode:PadMode=BILINEAR, # Pillow `Image` 调整大小模式
resize_modetuple=None # 可选的输入后裁剪/填充调整大小
resize_to:
):if isinstance(sz,int): sz = (sz,sz)
= fastuple(_get_sz(x) if orig_sz is None else orig_sz)
orig_sz = fastuple(sz),fastuple(((_get_sz(x)-sz)//2) if tl is None else tl)
sz,tl return x._do_crop_pad(sz, tl, orig_sz=orig_sz, pad_mode=pad_mode, resize_mode=resize_mode, resize_to=resize_to)
def _process_sz(size):
if isinstance(size,int): size=(size,size)
return fastuple(size[1],size[0])
def _get_sz(x):
if isinstance(x, tuple): x = x[0]
if not isinstance(x, Tensor): return fastuple(x.size)
return fastuple(getattr(x, 'img_size', getattr(x, 'sz', (x.shape[-1], x.shape[-2]))))
@delegates()
class CropPad(DisplayedTransform):
"Center crop or pad an image to `size`"
= 0
order def __init__(self,
int|tuple, # 裁剪或填充的目标尺寸,若指定一个值则重复使用
size:=PadMode.Zeros, # 一个 `PadMode`
pad_mode:PadMode**kwargs
):= _process_sz(size)
size
store_attr()super().__init__(**kwargs)
def encodes(self, x:Image.Image|TensorBBox|TensorPoint):
= _get_sz(x)
orig_sz = (orig_sz-self.size)//2
tl return x.crop_pad(self.size, tl, orig_sz=orig_sz, pad_mode=self.pad_mode)
调用 @patch
的 crop_pad
行为适用于 Image
、TensorImage
、TensorPoint
和 TensorBBox
= plt.subplots(1,3,figsize=(12,4))
_,axs for ax,sz in zip(axs.flatten(), [300, 500, 700]):
=ax, title=f'Size {sz}');
show_image(img.crop_pad(sz), ctxprint(img.crop_pad(sz).shape)
(300, 300)
(500, 500)
(700, 700)
= plt.subplots(1,3,figsize=(12,4))
_,axs for ax,mode in zip(axs.flatten(), [PadMode.Zeros, PadMode.Border, PadMode.Reflection]):
600,700), pad_mode=mode), ctx=ax, title=mode); show_image(img.crop_pad((
= torch.empty(16,16).uniform_(0,1)
ta = torch.empty(20,20).uniform_(0,1)
tb = PILImage.create(ta)
x1 = PILImage.create(tb)
x2 = CropPad(10)
crop = crop((x1,x2))
y1,y2 10,10))
test_eq(y1.size, (10,10))
test_eq(y2.size, (3:13,3:13])
test_eq(tensor(array(y1)), ta[5:15,5:15]) test_eq(tensor(array(y2)), tb[
= torch.empty(20,16).uniform_(0,1)
t = PILImage.create(t)
x = CropPad(10)
crop = crop(x)
y 10,10))
test_eq(y.size, (5:15,3:13])
test_eq(tensor(array(y)), t[
= TensorPoint(torch.tensor([[-1,-1], [-0.5,-0.5], [0.,0.]]), img_size=(16,20))
pts = crop((x,pts))
y,p1 -1.6, -2], [-0.8,-1], [0,0]])) test_eq(p1, torch.tensor([[
#填充测试
= torch.empty(10,8).uniform_(0,1)
t = PILImage.create(t)
x = CropPad(12)
crop = crop(x)
y 12,12))
test_eq(y.size, (1:11,2:10], t)
test_eq(tensor(array(y))[
= TensorPoint(torch.tensor([[-1,-1], [-0.5,-0.5], [0.,0.]]), img_size=(8,10))
pts = crop((x,pts))
y,p1 -2/3, -5/6], [-1/3,-5/12], [0,0]])) test_close(p1, torch.tensor([[
# 裁剪和填充测试
= torch.empty(10,10).uniform_(0,1)
t = PILImage.create(t)
x = x.crop_pad((5, 5), (-2, 2))
y1 = x.crop_pad((5, 5), (8, 2))
y2 = x.crop_pad((5, 5), (-1, -1))
y3 5, 5))
test_eq(y1.shape, (5, 5))
test_eq(y2.shape, (5, 5))
test_eq(y3.shape, (2:], t[2:7, 0:3])
test_eq(tensor(array(y1))[:, 2], t[2:7, 8:])
test_eq(tensor(array(y2))[:, :1:, 1:], t[:4, :4]) test_eq(tensor(array(y3))[
随机裁剪 -
@delegates()
class RandomCrop(RandTransform):
"Randomly crop an image to `size`"
= None,1
split_idx,order def __init__(self,
int|tuple, # 裁剪尺寸,若指定一个值则重复使用
size:**kwargs
):= _process_sz(size)
size
store_attr()super().__init__(**kwargs)
def before_call(self,
b, int # 列车/验证数据集索引
split_idx:
):"Randomly positioning crop if train dataset else center crop"
self.orig_sz = _get_sz(b)
if split_idx: self.tl = (self.orig_sz-self.size)//2
else:
= self.orig_sz[0] - self.size[0]
wd = self.orig_sz[1] - self.size[1]
hd = (wd, -1) if wd < 0 else (0, wd)
w_rand = (hd, -1) if hd < 0 else (0, hd)
h_rand self.tl = fastuple(random.randint(*w_rand), random.randint(*h_rand))
def encodes(self, x:Image.Image|TensorBBox|TensorPoint):
return x.crop_pad(self.size, self.tl, orig_sz=self.orig_sz)
show_doc(RandomCrop)
RandomCrop
RandomCrop (size:int|tuple, **kwargs)
Randomly crop an image to size
Type | Details | |
---|---|---|
size | int | tuple | Size to crop to, duplicated if one value is specified |
class OldRandomCrop(CropPad):
"Randomly crop an image to `size`"
def before_call(self, b, split_idx):
super().before_call(b, split_idx)
= self.orig_sz
w,h if not split_idx: self.tl = (random.randint(0,w-self.cp_size[0]), random.randint(0,h-self.cp_size[1]))
= plt.subplots(1,3,figsize=(12,4))
_,axs = RandomCrop(200)
f for ax in axs: show_image(f(img), ctx=ax);
在验证集上,我们进行中心裁剪。
= plt.subplots(1,3,figsize=(12,4))
_,axs for ax in axs: show_image(f(img, split_idx=1), ctx=ax);
= 25
large_sz = torch.empty(20, 16, 3).uniform_(0,255).type(torch.uint8)
t = PILImage.create(t)
x = RandomCrop(large_sz)
crop = crop(x, split_idx=0)
y
test_eq(y.size, (large_sz,large_sz))-crop.tl[1], :-crop.tl[0], :].sum(), 0)
test_eq(tensor(y)[:
= 10
small_sz = RandomCrop(small_sz)
crop = crop(x, split_idx=0)
y
test_eq(y.size, (small_sz,small_sz))1]:crop.tl[1]+small_sz,crop.tl[0]:crop.tl[0]+small_sz])
test_eq(tensor(array(y)), t[crop.tl[
=False
crop.as_item= TensorPoint(torch.tensor([[-1,-1], [-0.5,-0.5], [0.,0.]]))
pts = crop((x,pts), split_idx=0)
y,p1 +1) * tensor([1.6,2.]) - tensor(crop.tl).float()/5 - 1) test_eq(p1, (pts
#测试 这是验证集上的中心裁剪
= crop(x, split_idx=1)
y 10,10))
test_eq(y.size, (5:15,3:13]) test_eq(tensor(array(y)), t[
'ResizeMethod', **{o:o.lower() for o in ['Squish', 'Crop', 'Pad']},
mk_class(="All possible resize method as attributes to get tab-completion and typo-proofing") doc
= ['ResizeMethod'] _all_
=3) show_doc(ResizeMethod, title_level
ResizeMethod
ResizeMethod (*args, **kwargs)
All possible resize method as attributes to get tab-completion and typo-proofing
'squish') test_eq(ResizeMethod.Squish,
调整大小 -
@delegates()
class Resize(RandTransform):
= None,BILINEAR,NEAREST,1
split_idx,mode,mode_mask,order "Resize image to `size` using `method`"
def __init__(self,
int|tuple, # 调整后的尺寸,若指定一个值则重复使用
size:=ResizeMethod.Crop, # 一个 `ResizeMethod`
method:ResizeMethod=PadMode.Reflection, # 一个 `PadMode`
pad_mode:PadMode=(BILINEAR, NEAREST), # Pillow 的 `Image` 类提供了重采样模式,用于对掩码进行重采样。
resamples**kwargs
):= _process_sz(size)
size
store_attr()super().__init__(**kwargs)
self.mode,self.mode_mask = resamples
def before_call(self,
b, int # 列车/验证数据集索引
split_idx:
):if self.method==ResizeMethod.Squish: return
self.pcts = (0.5,0.5) if split_idx else (random.random(),random.random())
def encodes(self, x:Image.Image|TensorBBox|TensorPoint):
= _get_sz(x)
orig_sz if self.method==ResizeMethod.Squish:
return x.crop_pad(orig_sz, fastuple(0,0), orig_sz=orig_sz, pad_mode=self.pad_mode,
=self.mode_mask if isinstance(x,PILMask) else self.mode, resize_to=self.size)
resize_mode
= orig_sz
w,h = (operator.lt,operator.gt)[self.method==ResizeMethod.Pad]
op = w/self.size[0] if op(w/self.size[0],h/self.size[1]) else h/self.size[1]
m = (int(m*self.size[0]),int(m*self.size[1]))
cp_sz = fastuple(int(self.pcts[0]*(w-cp_sz[0])), int(self.pcts[1]*(h-cp_sz[1])))
tl return x.crop_pad(cp_sz, tl, orig_sz=orig_sz, pad_mode=self.pad_mode,
=self.mode_mask if isinstance(x,PILMask) else self.mode, resize_to=self.size) resize_mode
size
可以是一个整数(在这种情况下,图像将被调整为正方形)或者一个元组。根据 method
: - 我们将任何矩形挤压到 size
- 我们调整大小,使得较短的维度匹配,并使用 pad_mode
进行填充 - 我们调整大小,使得较大的维度匹配并裁剪(在训练集上随机裁剪,在验证集上居中裁剪)
在进行调整大小时,我们对图像使用 resamples[0]
,对分割掩码使用 resamples[1]
。
= plt.subplots(1,3,figsize=(12,4))
_,axs for ax,method in zip(axs.flatten(), [ResizeMethod.Squish, ResizeMethod.Pad, ResizeMethod.Crop]):
= Resize(256, method=method)
rsz =0), ctx=ax, title=method); show_image(rsz(img, split_idx
在验证集上,裁剪总是中心裁剪(在被裁剪的维度上)。
= plt.subplots(1,3,figsize=(12,4))
_,axs for ax,method in zip(axs.flatten(), [ResizeMethod.Squish, ResizeMethod.Pad, ResizeMethod.Crop]):
= Resize(256, method=method)
rsz =1), ctx=ax, title=method); show_image(rsz(img, split_idx
= torch.empty(20,16).uniform_(0,1)
t = PILImage.create(t)
x = Resize(10)
rsz = rsz(x, split_idx=0)
y 10,10))
test_eq(y.size, (
= rsz(x, split_idx=1)
y 10,10)) test_eq(y.size, (
随机调整大小裁剪 -
@delegates()
class RandomResizedCrop(RandTransform):
"Picks a random scaled crop of an image and resize it to `size`"
= None,1
split_idx,order def __init__(self,
int|tuple, # 最终尺寸,若指定一个值则重复。
size:float=0.08, # 作物相对于图像区域的最小比例
min_scale:=(3/4, 4/3), # 输出宽高比范围
ratio=(BILINEAR, NEAREST), # Pillow `Image` 重采样模式,用于掩码的重采样[1]
resamplesfloat=0.14, # 验证集中边缘裁剪尺寸的比例
val_xtra:float=1., # 裁剪的最大比例,相对于图像区域
max_scale:**kwargs
):= _process_sz(size)
size
store_attr()super().__init__(**kwargs)
self.mode,self.mode_mask = resamples
def before_call(self,
b, # 列车/验证数据集索引
split_idx
):= self.orig_sz = _get_sz(b)
w,h if split_idx:
= math.ceil(max(*self.size[:2])*self.val_xtra/8)*8
xtra self.final_size = (self.size[0]+xtra, self.size[1]+xtra)
self.tl,self.cp_size = (0,0),self.orig_sz
return
self.final_size = self.size
for attempt in range(10):
= random.uniform(self.min_scale, self.max_scale) * w * h
area = math.exp(random.uniform(math.log(self.ratio[0]), math.log(self.ratio[1])))
ratio = int(round(math.sqrt(area * ratio)))
nw = int(round(math.sqrt(area / ratio)))
nh if nw <= w and nh <= h:
self.cp_size = (nw,nh)
self.tl = random.randint(0,w-nw), random.randint(0,h - nh)
return
if w/h < self.ratio[0]: self.cp_size = (w, int(w/self.ratio[0]))
elif w/h > self.ratio[1]: self.cp_size = (int(h*self.ratio[1]), h)
else: self.cp_size = (w, h)
self.tl = ((w-self.cp_size[0])//2, (h-self.cp_size[1])//2)
def encodes(self, x:Image.Image|TensorBBox|TensorPoint):
= x.crop_pad(self.cp_size, self.tl, orig_sz=self.orig_sz,
res =self.mode_mask if isinstance(x,PILMask) else self.mode, resize_to=self.final_size)
resize_modeif self.final_size != self.size: res = res.crop_pad(self.size) #验证集:最终的中心裁剪
return res
作物随机选择一个范围为 (min_scale,max_scale)
的缩放比例和一个范围内的比例,然后使用 resamples[0]
对图像进行调整大小,使用 resamples[1]
对分割掩码进行调整大小。在验证集上,如果图像的比例不在范围内(达到最小值或最大值),则进行中心裁剪,然后调整大小。
= RandomResizedCrop(256)
crop = plt.subplots(3,3,figsize=(9,9))
_,axs for ax in axs.flatten():
= crop(img)
cropped =ax); show_image(cropped, ctx
256,256]) test_eq(cropped.shape, [
Squish用于验证集,首先去除每一侧的val_xtra
比例。
= subplots(1,3)
_,axs for ax in axs.flatten(): show_image(crop(img, split_idx=1), ctx=ax);
通过将max_scale
设置为较低的值,可以强制执行小裁剪。
= RandomResizedCrop(256, min_scale=0.05, max_scale=0.15)
small_crop = plt.subplots(3,3,figsize=(9,9))
_,axs for ax in axs.flatten():
= small_crop(img)
cropped =ax); show_image(cropped, ctx
256,256]) test_eq(cropped.shape, [
RatioResize -
class RatioResize(DisplayedTransform):
'Resizes the biggest dimension of an image to `max_sz` maintaining the aspect ratio'
= 1
order def __init__(self,
int, # 调整大小后的图像的最大尺寸
max_sz: =(BILINEAR, NEAREST), # Pillow `Image` 重采样模式,用于掩码的重采样[1]
resamples**kwargs
):
store_attr()super().__init__(**kwargs)
def encodes(self, x:Image.Image|TensorBBox|TensorPoint):
= _get_sz(x)
w,h if w >= h: nw,nh = self.max_sz,h*self.max_sz/w
else: nw,nh = w*self.max_sz/h,self.max_sz
return Resize(size=(int(nh),int(nw)), resamples=self.resamples)(x)
256)(img) RatioResize(
256)(img).size[0], 256)
test_eq(RatioResize(256)(img.dihedral(3)).size[1], 256) test_eq(RatioResize(
GPU上的仿射变换和坐标变换
= TensorImage(array(img)).permute(2,0,1).float()/255.
timg def _batch_ex(bs): return TensorImage(timg[None].expand(bs, *timg.shape).clone())
def _init_mat(x):
= torch.eye(3, device=x.device).float()
mat return mat.unsqueeze(0).expand(x.size(0), 3, 3).contiguous()
仿射坐标变换 -
使用 coords
中的坐标将 x
中的坐标映射到新的位置,以进行如 flip
等变换。最好使用 TensorImage.affine_coord
,因为这将 _grid_sample
和 F.affine_grid
组合在一起,便于使用。使用 F.affine_grid
更容易生成 coords
,因为这通常是一个大的 [H,W,2]
,其中 H
和 W
是图像 x
的高度和宽度。
def _grid_sample(x, coords, mode='bilinear', padding_mode='reflection', align_corners=None):
"Resample pixels in `coords` from `x` by `mode`, with `padding_mode` in ('reflection','border','zeros')."
# #coords = coords.permute(0, 3, 1, 2).contiguous().permute(0, 2, 3, 1) 优化布局以适应grid_sample
if mode=='bilinear': # 获取更平滑向下采样的技巧
= coords.min(),coords.max()
mn,mx # max amount we're affine zooming by (>1 means zooming in)
= 1/(mx-mn).item()*2
z # amount we're resizing by, with 100% extra margin
= min(x.shape[-2]/coords.shape[-2], x.shape[-1]/coords.shape[-1])/2
d # If we're resizing up by >200%, and we're zooming less than that, interpolate first
if d>1 and d>z:
= F.interpolate(x, scale_factor=1/d, mode='area', recompute_scale_factor=True)
x return F.grid_sample(x, coords, mode=mode, padding_mode=padding_mode, align_corners=align_corners)
这是我们开始使用的图像,并将用于以下示例。
=torch.tensor([[[0,0,0],[1,0,0],[2,0,0]],
img0,1,0],[1,1,0],[2,1,0]],
[[0,2,0],[1,2,0],[2,2,0]]]).permute(2,0,1)[None]/2.
[[ show_images(img)
在这里我们使用 _grid_sample
,但不改变原始图像。注意 grid
中的坐标是如何映射到 img
中的坐标的。
=torch.tensor([[[[-1,-1],[0,-1],[1,-1]],
grid-1,0],[0,0],[1,0]],
[[-1,1],[0,1],[1,1.]]]])
[[=_grid_sample(img, grid,align_corners=True)
img show_images(img)
接下来,我们通过手动编辑网格来进行翻转。
=torch.tensor([[[1.,-1],[0,-1],[-1,-1]],
grid1,0],[0,0],[-1,0]],
[[1,1],[0,1],[-1,1]]])
[[=_grid_sample(img, grid[None],align_corners=True)
img show_images(img)
接下来,我们将图像向上移动一个位置。默认情况下,_grid_sample
使用反射填充。
=torch.tensor([[[[-1,0],[0,0],[1,0]],
grid-1,1],[0,1],[1,1]],
[[-1,2],[0,2],[1,2.]]]])
[[=_grid_sample(img, grid,align_corners=True)
img show_images(img)
affine_coord
使我们能够更轻松地处理图像,因为它允许我们指定远比网格更小的 mat
,而网格则要求我们为每个像素指定值。
def affine_grid(
# 一批仿射变换矩阵
theta:Tensor, tuple, # 输出尺寸
size:bool=None # PyTorch `F.grid_sample` 对齐角点
align_corners:
):" Generates `TensorFlowField` from a transformation affine matrices `theta`"
return TensorFlowField(F.affine_grid(theta, size, align_corners=align_corners))
@patch
def affine_coord(x: TensorImage,
=None, # 一批仿射变换矩阵
mat:Tensorcallable=None, # 可组合坐标变换的部分功能
coord_tfm:int|tuple=None, # 如果指定一个值,则输出尺寸将重复。
sz:str='bilinear', # PyTorch 中的 `F.grid_sample` 插值方法应用于 `TensorImage`
mode:=PadMode.Reflection, # 应用于 `TensorImage` 的填充
pad_mode=True # PyTorch `F.grid_sample` 对齐角点
align_corners
):"Apply affine and coordinate transforms to `TensorImage`"
if mat is None and coord_tfm is None and sz is None: return x
= tuple(x.shape[-2:]) if sz is None else (sz,sz) if isinstance(sz,int) else tuple(sz)
size if mat is None: mat = _init_mat(x)[:,:2]
= affine_grid(mat, x.shape[:2] + size, align_corners=align_corners)
coords if coord_tfm is not None: coords = coord_tfm(coords)
return TensorImage(_grid_sample(x, coords, mode=mode, padding_mode=pad_mode, align_corners=align_corners))
@patch
def affine_coord(x: TensorMask,
=None, # 一批仿射变换矩阵
mat:Tensorcallable=None, # 可组合坐标变换的部分功能
coord_tfm:int|tuple=None, # 如果指定一个值,则输出尺寸将重复。
sz:='nearest', # PyTorch 中的 `F.grid_sample` 插值方法应用于 `TensorMask`
mode=PadMode.Reflection, # 应用于 `TensorMask` 的填充
pad_mode=True # PyTorch `F.grid_sample` 对齐角点
align_corners
):"Apply affine and coordinate transforms to `TensorMask`"
= (x.ndim==3)
add_dim if add_dim: x = x[:,None]
= TensorImage.affine_coord(x.float(), mat, coord_tfm, sz, mode, pad_mode, align_corners).long()
res if add_dim: res = res[:,0]
return TensorMask(res)
@patch
def affine_coord(x: TensorPoint,
=None, # 一批仿射变换矩阵
mat:Tensor=None, # 可组合坐标变换的部分功能
coord_tfm=None, # 如果指定一个值,则输出尺寸将重复。
sz='nearest', # PyTorch 中的 `F.grid_sample` 插值方法应用于 `TensorPoint`
mode=PadMode.Zeros, # 应用于 `TensorPoint` 的填充
pad_mode=True # PyTorch `F.grid_sample` 对齐角点
align_corners
):"Apply affine and coordinate transforms to `TensorPoint`"
#断言 pad_mode 等于 PadMode.Zeros, "Only zero padding is supported for `TensorPoint` and `TensorBBox`"
if sz is None: sz = getattr(x, "img_size", None)
if coord_tfm is not None: x = coord_tfm(x, invert=True)
if mat is not None:
= TensorPoint(mat)
mat = (x - mat[:,:,2].unsqueeze(1)) @ torch.inverse(mat[:,:,:2].transpose(1,2))
x return TensorPoint(x, sz=sz)
@patch
def affine_coord(x: TensorBBox,
=None, # 一批仿射变换矩阵
mat=None, # 可组合坐标变换的部分功能
coord_tfm=None, # 如果指定一个值,则输出尺寸将重复。
sz='nearest', # PyTorch 中的 `F.grid_sample` 插值方法应用于 `TensorBBox`
mode=PadMode.Zeros, # 应用于 `TensorBBox` 的填充
pad_mode=True # PyTorch `F.grid_sample` 对齐角点
align_corners
):"Apply affine and coordinate transforms to `TensorBBox`"
if mat is None and coord_tfm is None: return x
if sz is None: sz = getattr(x, "img_size", None)
= x.shape[:2]
bs,n = stack([x[...,:2], stack([x[...,0],x[...,3]],dim=2),
pnts 2],x[...,1]],dim=2), x[...,2:]], dim=2)
stack([x[...,= TensorPoint(pnts.view(bs, 4*n, 2), img_size=sz).affine_coord(mat, coord_tfm, sz, mode, pad_mode)
pnts = pnts.view(bs, n, 4, 2)
pnts = pnts.min(dim=2)[0],pnts.max(dim=2)[0]
tl,dr return TensorBBox(torch.cat([tl, dr], dim=2), img_size=sz)
def _prepare_mat(x, mat):
= getattr(x, 'img_size', x.shape[-2:])
h,w 0,1] *= h/w
mat[:,1,0] *= w/h
mat[:,return mat[:,:2]
class AffineCoordTfm(RandTransform):
"Combine and apply affine and coord transforms"
= 30,None
order,split_idx def __init__(self,
callable|MutableSequence=None, # 仿射变换作用于一批数据
aff_fs:callable|MutableSequence=None, # 批量坐标变换功能
coord_fs:int|tuple=None, # 如果只指定一个值,则输出尺寸将重复。
size:='bilinear', # PyTorch `F.grid_sample` 插值
mode=PadMode.Reflection, # 一个 `PadMode`
pad_mode='nearest', # 遮罩重采样模式
mode_mask=None, # PyTorch `F.grid_sample` 对齐角点
align_corners**kwargs
):=['aff_fs','coord_fs'])
store_attr(butsuper().__init__(**kwargs)
self.aff_fs,self.coord_fs = L(aff_fs),L(coord_fs)
self.cp_size = None if size is None else (size,size) if isinstance(size, int) else tuple(size)
def before_call(self,
b, # 列车/验证数据集索引
split_idx,
):while isinstance(b, tuple): b = b[0]
self.split_idx = split_idx
self.do,self.mat = True,self._get_affine_mat(b)
for t in self.coord_fs: t.before_call(b)
def compose(self, tfm):
"Compose `self` with another `AffineCoordTfm` to only do the interpolation step once"
# 待办事项:保持 `name` 与组合同步更新
# 待办:添加选项以仅显示属性的子集,例如,对于 `Flip`
self.aff_fs += tfm.aff_fs
self.coord_fs += tfm.coord_fs
def _get_affine_mat(self, x):
= _init_mat(x)
aff_m if self.split_idx: return _prepare_mat(x, aff_m)
= [f(x) for f in self.aff_fs]
ms = [m for m in ms if m is not None]
ms for m in ms: aff_m = aff_m @ m
return _prepare_mat(x, aff_m)
def _encode(self, x, mode, reverse=False):
= None if len(self.coord_fs)==0 or self.split_idx else partial(compose_tfms, tfms=self.coord_fs, reverse=reverse)
coord_func return x.affine_coord(self.mat, coord_func, sz=self.size, mode=mode, pad_mode=self.pad_mode, align_corners=self.align_corners)
def encodes(self, x:TensorImage): return self._encode(x, self.mode)
def encodes(self, x:TensorMask): return self._encode(x, self.mode_mask)
def encodes(self, x:TensorPoint|TensorBBox): return self._encode(x, self.mode, reverse=True)
调用 @patch
的 affine_coord
行为用于 TensorImage
、TensorMask
、TensorPoint
和 TensorBBox
在对与 size
相对应的基本网格上执行相应的仿射变换之前,先将 aff_fs
返回的所有矩阵相乘,然后在得到的坐标流上应用所有 coord_fs
,最后使用 mode
和 pad_mode
进行插值。
下面是如何在图像上使用 affine_coord
的示例。包括身份变换或原始图像、翻转,以及将图像向左移动。
=_batch_ex(3)
imgs=torch.tensor([[1,0,0],[0,1,0.]])
identity=torch.tensor([[-1,0,0],[0,1,0.]])
flip=torch.tensor([[1,0,1.],[0,1,0]])
translation=torch.stack((identity,flip,translation))
mats=PadMode.Zeros)) #最容易观察到的零点 show_images(imgs.affine_coord(mats,pad_mode
现在你可能会问:“这个 mat
是什么?”让我们快速看一下下面的标识。
=_batch_ex(1)
imgs=torch.tensor([[1,0,0],[0,1,0.]])
identity=identity[:,0:2]
eye=identity[:,2:3]
bi eye,bi
(tensor([[1., 0.],
[0., 1.]]),
tensor([[0.],
[0.]]))
注意张量’eye’是一个单位矩阵。如果我们将其与原始图像中的单个坐标x,y相乘,我们将简单地得到相同的x和y值。在这次乘法后添加bi。例如,让我们将图像翻转,使得左上角位于右上角:
=torch.tensor([[-1,0,0],[0,1,0.]])
t=t[:,0:2]
eye=t[:,2:3]
bi=torch.tensor([-1.,-1]) #左上角
xysum(xy*eye,dim=1)+bi[0] #现在右上角 torch.
tensor([ 1., -1.])
show_doc(AffineCoordTfm.compose)
AffineCoordTfm.compose
AffineCoordTfm.compose (tfm)
Compose self
with another AffineCoordTfm
to only do the interpolation step once
#测试训练集和验证集上的调整大小操作是否完成
= AffineCoordTfm(size=10)
tfm = TensorImage(torch.empty(2, 3, 20,16).uniform_(0,1))
t for i in [0,1]:
= tfm(t, split_idx=i)
y 2, 3, 10, 10]) test_eq(y.shape, [
随机调整大小裁剪GPU -
class RandomResizedCropGPU(RandTransform):
"Picks a random scaled crop of an image and resize it to `size`"
= None,30
split_idx,order def __init__(self,
# 如果指定了一个值,则最终尺寸将重复。
size, =0.08, # 作物相对于图像区域的最小比例
min_scale=(3/4, 4/3), # 输出宽高比范围
ratio='bilinear', # PyTorch `F.grid_sample` 插值
mode=1., # 验证集作物相对于图像区域的缩放比例
valid_scale=1., # 裁剪的最大比例,相对于图像区域
max_scale='nearest', # `TensorMask` 的插值模式
mode_mask**kwargs
):if isinstance(size, int): size = (size,size)
store_attr()super().__init__(**kwargs)
def before_call(self, b, split_idx):
self.do = True
= fastuple((b[0] if isinstance(b, tuple) else b).shape[-2:])
h,w for attempt in range(10):
if split_idx: break
= random.uniform(self.min_scale,self.max_scale) * w * h
area = math.exp(random.uniform(math.log(self.ratio[0]), math.log(self.ratio[1])))
ratio = int(round(math.sqrt(area * ratio)))
nw = int(round(math.sqrt(area / ratio)))
nh if nw <= w and nh <= h:
self.cp_size = (nh,nw)
self.tl = random.randint(0,h - nh),random.randint(0,w-nw)
return
if w/h < self.ratio[0]: self.cp_size = (int(w/self.ratio[0]), w)
elif w/h > self.ratio[1]: self.cp_size = (h, int(h*self.ratio[1]))
else: self.cp_size = (h, w)
if split_idx: self.cp_size = (int(self.cp_size[0]*self.valid_scale), int(self.cp_size[1]*self.valid_scale))
self.tl = ((h-self.cp_size[0])//2,(w-self.cp_size[1])//2)
def _encode(self, x, mode):
= x[...,self.tl[0]:self.tl[0]+self.cp_size[0], self.tl[1]:self.tl[1]+self.cp_size[1]]
x return x.affine_coord(sz=self.size, mode=mode)
def encodes(self, x:TensorImage|TensorPoint|TensorBBox): return self._encode(x, self.mode)
def encodes(self, x:TensorMask): return self._encode(x, self.mode_mask)
= _batch_ex(8)
t = RandomResizedCropGPU(224, p=1.)
rrc = rrc(t)
y = plt.subplots(2,4, figsize=(12,6))
_,axs for ax in axs.flatten():
=ax) show_image(y[i], ctx
= _batch_ex(2)
t = RandomResizedCropGPU(224, p=1., min_scale=0.05, max_scale=0.1)
rrc = rrc(t)
y = plt.subplots(2,4, figsize=(12,6))
_,axs for ax in axs.flatten():
=ax) show_image(y[i], ctx
RandomResizedCropGPU
在批处理中的所有图像使用相同的区域。
GPU 辅助工具
本节包含用于在GPU上处理增强的辅助工具,这些工具在整个代码中使用。
def mask_tensor(
# 输入 `Tensor`
x:Tensor, =0.5, # 不戴口罩的概率
p=0., # 掩码值
neutral=False # 在整个批次上应用相同的掩码
batch
):"Mask elements of `x` with `neutral` with probability `1-p`"
if p==1.: return x
if batch: return x if random.random() < p else x.new_zeros(*x.size()) + neutral
if neutral != 0: x.add_(-neutral)
# 额外投射以浮点数和长整型防止在mps加速器上崩溃(问题 #3911)
= x.new_empty(*x.size()).float().bernoulli_(p).long()
mask
x.mul_(mask)return x.add_(neutral) if neutral != 0 else x
让我们来看看mask_tensor
可能如何使用的一些示例,我们使用clone()
是因为这个操作会覆盖输入。对于这个示例,我们尝试使用度数来旋转图像。
with no_random():
=torch.tensor([60,-30,90,-210,270,-180,120,-240,150])
xprint('p=0.5: ',mask_tensor(x.clone()))
print('p=1.0: ',mask_tensor(x.clone(),p=1.))
print('p=0.0: ',mask_tensor(x.clone(),p=0.))
p=0.5: tensor([ 60, -30, 90, -210, 0, -180, 0, 0, 150])
p=1.0: tensor([ 60, -30, 90, -210, 270, -180, 120, -240, 150])
p=0.0: tensor([0, 0, 0, 0, 0, 0, 0, 0, 0])
注意到 p
控制一个值被替换为 0 的可能性,或者保持不变,因为 0 度旋转就只是原始图像。batch
作用于整个批次,而不是批次中的单个元素。现在让我们考虑一个不同的例子,即处理亮度。注意:亮度为 0 时是完全黑色的图像。
=torch.tensor([0.6,0.4,0.3,0.7,0.4])
xprint('p=0.: ',mask_tensor(x.clone(),p=0))
print('p=0.,neutral=0.5: ',mask_tensor(x.clone(),p=0,neutral=0.5))
p=0.: tensor([0., 0., 0., 0., 0.])
p=0.,neutral=0.5: tensor([0.5000, 0.5000, 0.5000, 0.5000, 0.5000])
在这里,如果我们有一幅完全黑色的图像,那将是非常糟糕的,因为这不是一幅未改变的图像。相反,我们将 neutral
设置为 0.5,这个值是未改变图像的亮度值。
_draw_mask
用于支持许多后续变换的API,以创建mask_tensor
。(p, neutral, batch)
将传递给mask_tensor
。def_draw
是默认的绘制函数,当未提供自定义用户设置时将执行此函数。draw
是用户定义的行为,可以是一个函数、浮点数列表或单个浮点数。draw
和def_draw
必须返回一个张量。
def _draw_mask(x, def_draw, draw=None, p=0.5, neutral=0., batch=False):
"Creates mask_tensor based on `x` with `neutral` with probability `1-p`. "
if draw is None: draw=def_draw
if callable(draw): res=draw(x)
elif is_listy(draw):
assert len(draw)>=x.size(0)
= tensor(draw[:x.size(0)], dtype=x.dtype, device=x.device)
res else: res = x.new_zeros(x.size(0)) + draw
return TensorBase(mask_tensor(res, p=p, neutral=neutral, batch=batch))
在这里,我们使用从1到8的随机整数作为我们的def_draw
,这个例子与Dihedral
非常相似。
= torch.zeros(10,2,3)
x def def_draw(x):
=torch.randint(1,8, (x.size(0),))
xreturn x
with no_random(): print(torch.randint(1,8, (x.size(0),)))
with no_random(): print(_draw_mask(x, def_draw))
tensor([2, 3, 5, 6, 5, 4, 6, 6, 1, 1])
TensorBase([2, 0, 0, 6, 5, 4, 6, 0, 0, 1])
接下来,有三种方式来定义 draw
,作为常量、作为列表,以及作为函数。所有这些都覆盖了 def_draw
,因此它对最终结果没有影响。
with no_random():
print('const: ',_draw_mask(x, def_draw, draw=1))
print('list : ', _draw_mask(x, def_draw, draw=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]))
print('list : ',_draw_mask(x[0:2], def_draw, draw=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]))
print('funct: ',_draw_mask(x, def_draw, draw=lambda x: torch.arange(1,x.size(0)+1)))
try:
=[1,2])
_draw_mask(x, def_draw, drawexcept AssertionError as e:
print(type(e),'\n',e)
const: TensorBase([1., 1., 1., 1., 0., 1., 0., 0., 1., 1.])
list : TensorBase([ 1., 2., 0., 0., 5., 0., 7., 0., 0., 10.])
list : TensorBase([1., 0.])
funct: TensorBase([ 1, 2, 3, 4, 0, 6, 7, 8, 9, 10])
<class 'AssertionError'>
注意,当使用列表时,它的大小可以大于批量大小,但不能小于批量大小。否则,将没有足够的增强来处理批次中的元素。
= torch.zeros(5,2,3)
x = lambda x: torch.randint(0,8, (x.size(0),))
def_draw = _draw_mask(x, def_draw)
t assert (0. <= t).all() and (t <= 7).all()
= _draw_mask(x, def_draw, 1)
t assert (0. <= t).all() and (t <= 1).all()
1, p=1), tensor([1.,1,1,1,1]))
test_eq(_draw_mask(x, def_draw, 0,1,2,3,4], p=1), tensor([0.,1,2,3,4]))
test_eq(_draw_mask(x, def_draw, [0:3], def_draw, [0,1,2,3,4], p=1), tensor([0.,1,2]))
test_eq(_draw_mask(x[for i in range(5):
= _draw_mask(x, def_draw, 1,batch=True)
t assert (t==torch.zeros(5)).all() or (t==torch.ones(5)).all()
翻转/二面角 GPU 辅助函数
affine_mat
用于将长度为6的向量转换为形状为 [bs,3,3] 的张量。这使我们能够组合仿射变换。
def affine_mat(*ms):
"Restructure length-6 vector `ms` into an affine matrix with 0,0,1 in the last line"
return stack([stack([ms[0], ms[1], ms[2]], dim=1),
3], ms[4], ms[5]], dim=1),
stack([ms[0]), t0(ms[0]), t1(ms[0])], dim=1)], dim=1) stack([t0(ms[
这是使用 affine_mat
翻转图像的示例。
=torch.tensor([-1,1,-1])
flips=t1(flips)
ones=t0(flips)
zeroes=affine_mat(flips,zeroes,zeroes,zeroes,ones,zeroes)
affinesprint(affines)
tensor([[[-1, 0, 0],
[ 0, 1, 0],
[ 0, 0, 1]],
[[ 1, 0, 0],
[ 0, 1, 0],
[ 0, 0, 1]],
[[-1, 0, 0],
[ 0, 1, 0],
[ 0, 0, 1]]])
这是为了让我们能够合并多个仿射变换,而不必对整个图像进行数学计算。我们需要这些矩阵的尺寸相同,以便能够进行矩阵乘法来合并仿射变换。虽然通常是在整个批处理上进行的,但这是对单个图像进行多个翻转变换的样子。由于我们翻转了两次,因此最终得到的仿射矩阵将简单地返回我们的原始图像。
如果您想了解更多关于此如何工作的内容,请参见 affine_coord
。
= torch.eye(3,dtype=torch.int64)
x for affine in affines:
@= affine
x print(x)
tensor([[-1, 0, 0],
[ 0, 1, 0],
[ 0, 0, 1]])
tensor([[-1, 0, 0],
[ 0, 1, 0],
[ 0, 0, 1]])
tensor([[1, 0, 0],
[0, 1, 0],
[0, 0, 1]])
flip_mat
将生成一个 [bs,3,3] 的张量,表示我们对一个批次的翻转操作,其概率为 p
。draw
可以用来定义一个函数、常量或列表,指定使用哪些翻转操作。如果 draw
是一个列表,则长度必须大于或等于批次大小。对于 draw
,0 表示原始图像,1 表示翻转后的图像。batch
表示整个批次将被翻转或不翻转。
def flip_mat(
# 输入张量
x:Tensor, =0.5, # 应用变换的概率
pint|MutableSequence|callable=None, # 自定义翻转而非随机
draw:bool=False # 对整个批次应用相同的翻转
batch:
):"Return a random flip matrix"
def _def_draw(x): return x.new_ones(x.size(0))
= x.new_ones(x.size(0)) - 2*_draw_mask(x, _def_draw, draw=draw, p=p, batch=batch)
mask return affine_mat(mask, t0(mask), t0(mask),
t0(mask), t1(mask), t0(mask))
以下是一些如何使用draw作为常量、列表和函数的示例。
with no_random():
=torch.randn(2,4,3)
xprint('const: ',flip_mat(x, draw=1))
print('list : ', flip_mat(x, draw=[1, 0]))
print('list : ',flip_mat(x[0:2], draw=[1, 0, 1, 0, 1]))
print('funct: ',flip_mat(x, draw=lambda x: torch.ones(x.size(0))))
lambda: flip_mat(x, draw=[1])) test_fail(
const: TensorBase([[[-1., 0., 0.],
[ 0., 1., 0.],
[ 0., 0., 1.]],
[[ 1., 0., 0.],
[ 0., 1., 0.],
[ 0., 0., 1.]]])
list : TensorBase([[[-1., 0., 0.],
[ 0., 1., 0.],
[ 0., 0., 1.]],
[[ 1., 0., 0.],
[ 0., 1., 0.],
[ 0., 0., 1.]]])
list : TensorBase([[[-1., 0., 0.],
[ 0., 1., 0.],
[ 0., 0., 1.]],
[[ 1., 0., 0.],
[ 0., 1., 0.],
[ 0., 0., 1.]]])
funct: TensorBase([[[ 1., 0., 0.],
[ 0., 1., 0.],
[ 0., 0., 1.]],
[[-1., 0., 0.],
[ 0., 1., 0.],
[ 0., 0., 1.]]])
= flip_mat(torch.randn(100,4,3))
x set(x[:,0,0].numpy()), {-1,1}) #可能失败,失败概率为2*2**(-100)(仅选择1或-1) test_eq(
def _get_default(x, mode=None, pad_mode=None):
if mode is None: mode='bilinear' if isinstance(x, TensorMask) else 'bilinear'
if pad_mode is None: pad_mode=PadMode.Zeros if isinstance(x, (TensorPoint, TensorBBox)) else PadMode.Reflection
= x[0] if isinstance(x, tuple) else x
x0 return x0,mode,pad_mode
翻转 -
水平翻转图像、掩膜、点和边界框。p
是应用翻转的概率。draw
可用于定义自定义翻转行为。
@patch
def flip_batch(x: TensorImage|TensorMask|TensorPoint|TensorBBox,
=0.5, # 翻转应用的概率
pint|MutableSequence|callable=None, # 自定义翻转而非随机
draw:int|tuple=None, # 如果指定一个值,则输出尺寸将重复。
size:=None, # PyTorch 中的 `F.grid_sample` 插值方法应用于 `x`
mode=None, # 应用于 `x` 的填充
pad_mode=True, # PyTorch `F.grid_sample` 对齐角点
align_corners=False # 对整个批次应用相同的翻转
batch
):= _get_default(x, mode, pad_mode)
x0,mode,pad_mode =flip_mat(x0, p=p, draw=draw, batch=batch)
matreturn x.affine_coord(mat=mat[:,:2], sz=size, mode=mode, pad_mode=pad_mode, align_corners=align_corners)
= _pnt2tensor([[1,0], [2,1]], (3,3))
t = TensorImage(t[None,None]).flip_batch(p=1.)
y 1,0], [0,1]], (3,3))[None,None])
test_eq(y, _pnt2tensor([[
= TensorPoint((tensor([[1.,0.], [2,1]]) -1)[None])
pnts =1.), tensor([[[1.,0.], [0,1]]]) -1)
test_eq(pnts.flip_batch(p
= TensorBBox(((tensor([[1.,0., 2.,1]]) -1)[None]))
bbox =1.), tensor([[[0.,0., 1.,1.]]]) -1) test_eq(bbox.flip_batch(p
class Flip(AffineCoordTfm):
"Randomly flip a batch of images with a probability `p`"
def __init__(self,
=0.5, # 翻转应用的概率
pint|MutableSequence|callable=None, # 自定义翻转而非随机
draw:int|tuple=None, # 如果指定一个值,则输出尺寸将重复。
size:str='bilinear', # PyTorch `F.grid_sample` 插值
mode:=PadMode.Reflection, # 一个 `PadMode`
pad_mode=True, # PyTorch `F.grid_sample` 对齐角点
align_corners=False # 对整个批次应用相同的翻转
batch
):= partial(flip_mat, p=p, draw=draw, batch=batch)
aff_fs super().__init__(aff_fs, size=size, mode=mode, pad_mode=pad_mode, align_corners=align_corners, p=p)
调用 @patch
的 flip_batch
行为用于 TensorImage
、TensorMask
、TensorPoint
和 TensorBBox
以下是使用 flip 的一些示例。请注意,常量 draw=1
实际上与默认设置相同。同时请注意,通过将 p=1.
并定义自定义 draw,我们可以在第三个示例中获得更细致的控制。
with no_random(32):
= _batch_ex(5)
imgs = Flip()
deflt = Flip(p=1.,draw=1) #与默认相同
const = Flip(p=1.,draw=[1,0,1,0,1]) #完全手动操作!!!
listy = Flip(draw=lambda x: torch.ones(x.size(0))) #与默认相同
funct
='Default Flip')
show_images( deflt(imgs) ,suptitle='Constant Flip',titles=[f'Flipped' for i in['','','','','']]) #同上
show_images( const(imgs) ,suptitle='Listy Flip',titles=[f'{i}Flipped' for i in ['','Not ','','Not ','']])
show_images( listy(imgs) ,suptitle='Flip By Function') #与默认相同 show_images( funct(imgs) ,suptitle
= Flip(p=1.)
flip = _pnt2tensor([[1,0], [2,1]], (3,3))
t
= flip(TensorImage(t[None,None]), split_idx=0)
y 1,0], [0,1]], (3,3))[None,None])
test_eq(y, _pnt2tensor([[
= TensorPoint((tensor([[1.,0.], [2,1]]) -1)[None])
pnts =0), tensor([[[1.,0.], [0,1]]]) -1)
test_eq(flip(pnts, split_idx
= TensorBBox(((tensor([[1.,0., 2.,1]]) -1)[None]))
bbox =0), tensor([[[0.,0., 1.,1.]]]) -1) test_eq(flip(bbox, split_idx
class DeterministicDraw():
def __init__(self, vals): self.vals,self.count = vals,-1
def __call__(self, x):
self.count += 1
return x.new_zeros(x.size(0)) + self.vals[self.count%len(self.vals)]
= _batch_ex(8)
t = DeterministicDraw(list(range(8)))
draw for i in range(15): test_eq(draw(t), torch.zeros(8)+(i%8))
class DeterministicFlip(Flip):
"Flip the batch every other call"
def __init__(self,
int|tuple=None, # 如果指定一个值,则输出尺寸将重复
size:str='bilinear', # PyTorch `F.grid_sample` 插值
mode:=PadMode.Reflection, # 一个 `PadMode`
pad_mode=True, # PyTorch 中的 `F.grid_sample` 函数,其 `align_corners` 参数
align_corners**kwargs
):super().__init__(p=1., draw=DeterministicDraw([0,1]), mode=mode, pad_mode=pad_mode, align_corners=align_corners, **kwargs)
接下来,我们将循环遍历示例图像的多个批次。DeterministicFlip 首先不翻转图像,然后在下一个批次中将翻转图像。
= _batch_ex(2)
b = DeterministicFlip()
dih for i,flipped in enumerate(['Not Flipped','Flipped']*2):
=f'Batch {i}',titles=[flipped]*2) show_images(dih(b),suptitle
二面角 -
由于我们正在处理正方形和长方形,我们可以将二面翻转视为沿水平方向、垂直方向和对角线及其组合的翻转。不过请记住,长方形在对角线方向上并不是对称的,因此这将有效地裁剪部分长方形。
def dihedral_mat(
# 输入 `张量`
x:Tensor, float=0.5, # 保持不变的概率
p:int|MutableSequence|callable=None, # 自定义二面角而非随机
draw:bool=False # 对整个批次应用相同的二面角
batch:
):"Return a random dihedral matrix"
def _def_draw(x): return torch.randint(0,8, (x.size(0),), device=x.device)
def _def_draw_b(x): return random.randint(0,7) + x.new_zeros((x.size(0),)).long()
= _draw_mask(x, _def_draw_b if batch else _def_draw, draw=draw, p=p, batch=batch).long()
idx = tensor([1,-1,1,-1,-1,1,1,-1], device=x.device).gather(0, idx)
xs = tensor([1,1,-1,1,-1,-1,1,-1], device=x.device).gather(0, idx)
ys = tensor([1,1,1,0,1,0,0,0], device=x.device).gather(0, idx)
m0 = tensor([0,0,0,1,0,1,1,1], device=x.device).gather(0, idx)
m1 return affine_mat(xs*m0, xs*m1, t0(xs),
*m1, ys*m0, t0(xs)).float() ys
@patch
def dihedral_batch(x: TensorImage|TensorMask|TensorPoint|TensorBBox,
=0.5, # 应用双翼机概率
pint|MutableSequence|callable=None, # 自定义二面角而非随机
draw:int|tuple=None, # 如果指定一个值,则输出尺寸将重复。
size:str='bilinear', # PyTorch 中的 `F.grid_sample` 插值方法应用于 `x`
mode:=None, # 应用于 `x` 的填充
pad_mode=False, # 对整个批次应用相同的二面角
batch=True # PyTorch 中的 `F.grid_sample` 函数,其 `align_corners` 参数
align_corners
):= _get_default(x, mode, pad_mode)
x0,mode,pad_mode = _prepare_mat(x, dihedral_mat(x0, p=p, draw=draw, batch=batch))
mat return x.affine_coord(mat=mat, sz=size, mode=mode, pad_mode=pad_mode, align_corners=align_corners)
class Dihedral(AffineCoordTfm):
"Apply a random dihedral transformation to a batch of images with a probability `p`"
def __init__(self,
=0.5, # 应用双翼面布局的概率
pint|MutableSequence|callable=None, # 自定义二面角而非随机
draw:int|tuple=None, # 如果指定一个值,则输出尺寸将重复。
size:str='bilinear', # PyTorch `F.grid_sample` 插值
mode:=PadMode.Reflection, # 一个 `PadMode`
pad_mode=False, # 对整个批次应用相同的二面角
batch=True # PyTorch 中的 `F.grid_sample` 函数,其 `align_corners` 参数
align_corners
):= partial(dihedral_mat, p=p, draw=draw, batch=batch)
f super().__init__(aff_fs=f, size=size, mode=mode, pad_mode=pad_mode, align_corners=align_corners)
调用 @patch
修饰的 dihedral_batch
行为,用于 TensorImage
、TensorMask
、TensorPoint
和 TensorBBox
draw
可以被指定用于自定义在应用变换时选择哪个翻转(默认是介于 0 和 7 之间的随机数)。它可以是一个介于 0 和 7 之间的整数,一个这样的整数列表(该列表的长度应等于或大于批量的大小),或者一个返回介于 0 和 7 之间的长整型张量的可调用对象。
with no_random():
= _batch_ex(5)
imgs = Dihedral()
deflt = Dihedral(p=1.,draw=1) #与flip_batch相同
const = Dihedral(p=1.,draw=[0,1,2,3,4]) #完全手动操作!!!
listy = Dihedral(draw=lambda x: torch.randint(0,8,(x.size(0),))) #与默认相同
funct
='Default Flips',titles=[i for i in range(imgs.size(0))])
show_images( deflt(imgs) ,suptitle='Constant Horizontal Flip',titles=[f'Flip 1' for i in [0,1,1,1,1]])
show_images( const(imgs) ,suptitle='Manual Listy Flips',titles=[f'Flip {i}' for i in [0,1,2,3,4]]) #手动指定,非随机!
show_images( listy(imgs) ,suptitle='Default Functional Flips',titles=[i for i in range(imgs.size(0))]) #与默认相同 show_images( funct(imgs) ,suptitle
#测试图像和点的行为是否一致
= _pnt2tensor([[1,0], [2,1], [2,2]], (3,3))[None,None].expand(8,1,3,3)
t = Dihedral(p=1., draw=list(range(8)))
dih =False
dih.as_item= tensor([[[1.,0.], [2,1], [2,2]]]) -1
pnts = dih((TensorImage(t),TensorPoint(pnts.expand(8,3,2))), split_idx=0)
y,z = y.nonzero()
res for i in range(8):
= {(t[0] + t[1]*3).item() for t in res[i*3:(i+1)*3][:,2:].flip(1)}
vals = {(t[0] + t[1]*3).item() for t in z[i]+1}
vals1
test_eq(vals, vals1)
#测试顺序与二面角项相同
= _pnt2tensor([[1,0], [2,1], [2,2]], (3,3))
tt = PILImage.create(tt)
x for i in range(8):
= x.dihedral(i)
y1 0], tensor(array(y1))) test_eq(y[i,
class DeterministicDihedral(Dihedral):
def __init__(self,
int|tuple=None, # 如果指定一个值,则输出尺寸将重复
size:str='bilinear', # PyTorch 中的 `F.grid_sample` 插值方法
mode:=PadMode.Reflection, # 一个 `PadMode`
pad_mode=None # PyTorch `F.grid_sample` 对齐角点
align_corners
):"Flip the batch every other call"
super().__init__(p=1., draw=DeterministicDraw(list(range(8))), pad_mode=pad_mode, align_corners=align_corners)
DeterministicDihedral
确保第一次调用不会被翻转,然后后续调用将按照确定性顺序翻转。在进行完所有 7 种可能的二面翻转后,模式将重置为未翻转版本。如果我们在批量大小为 1 的情况下进行此操作,它看起来会是这样的:
= _batch_ex(10)
t = DeterministicDihedral()
dih = plt.subplots(2,5, figsize=(14,6))
_,axs for i,ax in enumerate(axs.flatten()):
= dih(t)
y 0], ctx=ax, title=f'Batch {i}') show_image(y[
旋转 -
def rotate_mat(
# 输入 `张量`
x:Tensor, int=10, # 最大旋转角度
max_deg:float=0.5, # 应用旋转的概率
p:int|MutableSequence|callable=None, # 习俗传承而非随机
draw:bool=False # 对整个批次应用相同的旋转
batch:
):"Return a random rotation matrix with `max_deg` and `p`"
def _def_draw(x): return x.new_empty(x.size(0)).uniform_(-max_deg, max_deg)
def _def_draw_b(x): return x.new_zeros(x.size(0)) + random.uniform(-max_deg, max_deg)
= _draw_mask(x, _def_draw_b if batch else _def_draw, draw=draw, p=p, batch=batch) * math.pi/180
thetas return affine_mat(thetas.cos(), thetas.sin(), t0(thetas),
-thetas.sin(), thetas.cos(), t0(thetas))
@patch
@delegates(rotate_mat)
def rotate(x: TensorImage|TensorMask|TensorPoint|TensorBBox,
int|tuple=None, # 如果指定一个值,则输出大小将重复。
size:str=None, # PyTorch 中的 `F.grid_sample` 插值方法应用于 `x`
mode:=None, # 应用于 `x` 的填充
pad_modebool=True, # PyTorch `F.grid_sample` 对齐角点
align_corners:**kwargs
):= _get_default(x, mode, pad_mode)
x0,mode,pad_mode = _prepare_mat(x, rotate_mat(x0, **kwargs))
mat return x.affine_coord(mat=mat, sz=size, mode=mode, pad_mode=pad_mode, align_corners=align_corners)
class Rotate(AffineCoordTfm):
"Apply a random rotation of at most `max_deg` with probability `p` to a batch of images"
def __init__(self,
int=10, # 最大旋转度
max_deg:float=0.5, # 应用旋转的概率
p:int|MutableSequence|callable=None, # 自定义轮换而非随机
draw:int|tuple=None, # 如果指定一个值,则输出尺寸将重复。
size:str='bilinear', # PyTorch `F.grid_sample` 插值
mode:=PadMode.Reflection, # 一个 `PadMode`
pad_modebool=True, # PyTorch 中的 `F.grid_sample` 函数,其 `align_corners` 参数
align_corners:bool=False # 对整个批次应用相同的旋转
batch:
):= partial(rotate_mat, max_deg=max_deg, p=p, draw=draw, batch=batch)
aff_fs super().__init__(aff_fs=aff_fs, size=size, mode=mode, pad_mode=pad_mode, align_corners=align_corners)
调用 @patch
装饰的 rotate
行为用于 TensorImage
、TensorMask
、TensorPoint
和 TensorBBox
draw
可以指定,如果你想自定义在应用变换时选择哪个角度(默认是在-max_deg
和max_deg
之间的随机浮点数)。它可以是一个浮点数,一个浮点数列表(列表的长度应该等于或大于批次的大小),或一个返回浮点张量的可调用对象。
默认情况下,旋转只能旋转10度,这使得变化不容易被看出。这通常与flip
或dihedral
结合使用,后者默认情况下会产生更大的变化。例如,旋转180度与垂直翻转是相同的。
with no_random():
= [-30,-15,0,15,30]
thetas = _batch_ex(5)
imgs = Rotate()
deflt = Rotate(p=1.,draw=180) #与垂直翻转相同
const = Rotate(p=1.,draw=[-30,-15,0,15,30]) #完全手动操作!!!
listy = Rotate(draw=lambda x: x.new_empty(x.size(0)).uniform_(-10, 10)) #与默认相同
funct
='Default Rotate, notice the small rotation',titles=[i for i in range(imgs.size(0))])
show_images( deflt(imgs) ,suptitle='Constant 180 Rotate',titles=[f'180 Degrees' for i in range(imgs.size(0))])
show_images( const(imgs) ,suptitle#手动指定,非随机!
='Manual List Rotate',titles=[f'{i} Degrees' for i in [-30,-15,0,15,30]])
show_images( listy(imgs) ,suptitle#与默认相同
='Default Functional Rotate',titles=[i for i in range(imgs.size(0))]) show_images( funct(imgs) ,suptitle
放大 -
def zoom_mat(
# 输入 `张量`
x:Tensor, float=1., # 最小缩放
min_zoom:float=1.1, # 最大变焦
max_zoom:float=0.5, # 应用缩放的概率
p:float|MutableSequence|callable=None, # 用户自定义缩放比例
draw:float|MutableSequence|callable=None, # 用户定义的缩放中心点x坐标
draw_x:float|MutableSequence|callable=None, # 用户定义的缩放中心点在y轴上的位置
draw_y:bool=False # 对整个批次应用相同的缩放比例
batch:
):"Return a random zoom matrix with `max_zoom` and `p`"
def _def_draw(x): return x.new_empty(x.size(0)).uniform_(min_zoom, max_zoom)
def _def_draw_b(x): return x.new_zeros(x.size(0)) + random.uniform(min_zoom, max_zoom)
def _def_draw_ctr(x): return x.new_empty(x.size(0)).uniform_(0,1)
def _def_draw_ctr_b(x): return x.new_zeros(x.size(0)) + random.uniform(0,1)
assert(min_zoom<=max_zoom)
= 1/_draw_mask(x, _def_draw_b if batch else _def_draw, draw=draw, p=p, neutral=1., batch=batch)
s = _def_draw_ctr_b if batch else _def_draw_ctr
def_draw_c = _draw_mask(x, def_draw_c, draw=draw_x, p=1., batch=batch)
col_pct = _draw_mask(x, def_draw_c, draw=draw_y, p=1., batch=batch)
row_pct = (1-s) * (2*col_pct - 1)
col_c = (1-s) * (2*row_pct - 1)
row_c return affine_mat(s, t0(s), col_c,
t0(s), s, row_c)
@patch
@delegates(zoom_mat)
def zoom(x: TensorImage|TensorMask|TensorPoint|TensorBBox,
int|tuple=None, # 如果指定一个值,则输出尺寸将重复。
size:str='bilinear', # PyTorch 中的 `F.grid_sample` 插值方法应用于 `x`
mode:=PadMode.Reflection, # 应用于 `x` 的填充
pad_modebool=True, # PyTorch `F.grid_sample` 对齐角点
align_corners:**kwargs
):= _get_default(x, mode, pad_mode)
x0,mode,pad_mode return x.affine_coord(mat=zoom_mat(x0, **kwargs)[:,:2], sz=size, mode=mode, pad_mode=pad_mode, align_corners=align_corners)
class Zoom(AffineCoordTfm):
"Apply a random zoom of at most `max_zoom` with probability `p` to a batch of images"
def __init__(self,
float=1., # 最小缩放
min_zoom:float=1.1, # 最大变焦
max_zoom:float=0.5, # 应用缩放的概率
p:float|MutableSequence|callable=None, # 用户自定义缩放比例
draw:float|MutableSequence|callable=None, # 用户定义的缩放中心点 x 坐标
draw_x:float|MutableSequence|callable=None, # 用户定义的缩放中心点在y轴上的位置
draw_y:int|tuple=None, # 如果指定一个值,则输出尺寸将重复。
size:='bilinear', # PyTorch `F.grid_sample` 插值
mode=PadMode.Reflection, # 一个 `PadMode`
pad_mode=False, # 对整个批次应用相同的缩放比例
batch=True # PyTorch 中的 `F.grid_sample` 函数,其 `align_corners` 参数
align_corners
):= partial(zoom_mat, min_zoom=min_zoom, max_zoom=max_zoom, p=p, draw=draw, draw_x=draw_x, draw_y=draw_y, batch=batch)
aff_fs super().__init__(aff_fs, size=size, mode=mode, pad_mode=pad_mode, align_corners=align_corners)
调用 @patch
的 zoom
行为用于 TensorImage
、TensorMask
、TensorPoint
和 TensorBBox
。
draw
、draw_x
和 draw_y
可以被指定,如果你想自定义在应用变换时选择哪个比例和中心(默认情况下,draw
为 1 到 max_zoom
之间的随机浮点数,draw_x
和 draw_y
在 0 到 1 之间)。每个参数可以是一个浮点数,一个浮点数列表(此时列表长度应等于或大于批次大小),或者是一个返回浮点张量的可调用对象。
draw_x
和 draw_y
被期望为中心的位置百分比,0 表示最左/最上,1 表示最右/最下。
注意:默认情况下,缩放较小。
with no_random():
= [0.8, 1., 1.1, 1.25, 1.5]
scales = _batch_ex(5)
imgs = Zoom()
deflt = Zoom(p=1., draw=1.5) #'Constant scale and different random centers'
const = Zoom(p=1.,draw=scales,draw_x=0.5, draw_y=0.5) #完全手动秤,恒定中心
listy = Zoom(draw=lambda x: x.new_empty(x.size(0)).uniform_(1., 1.1)) #与默认相同
funct
='Default Zoom, note the small zooming', titles=[i for i in range(imgs.size(0))])
show_images( deflt(imgs) ,suptitle='Constant Scale, Valiable Position', titles=[f'Scale 1.5x' for i in range(imgs.size(0))])
show_images( const(imgs) ,suptitle='Manual Listy Scale, Centered', titles=[f'Scale {i}x' for i in scales])
show_images( listy(imgs) ,suptitle='Default Functional Zoom', titles=[i for i in range(imgs.size(0))]) #与默认相同 show_images( funct(imgs) ,suptitle
变形
def solve(A,B):
return torch.linalg.solve(A,B)
def find_coeffs(
# 原始点
p1:Tensor, # 目标点
p2:Tensor,
):"Find coefficients for warp tfm from `p1` to `p2`"
= []
m = p1[:,0,0]
p #我们需要求解的方程。
for i in range(p1.shape[1]):
0], p2[:,i,1], t1(p), t0(p), t0(p), t0(p), -p1[:,i,0]*p2[:,i,0], -p1[:,i,0]*p2[:,i,1]]))
m.append(stack([p2[:,i,0], p2[:,i,1], t1(p), -p1[:,i,1]*p2[:,i,0], -p1[:,i,1]*p2[:,i,1]]))
m.append(stack([t0(p), t0(p), t0(p), p2[:,i,#我们寻求的8个标量是AX = B的解
= stack(m).permute(2, 0, 1)
A = p1.view(p1.shape[0], 8, 1)
B return solve(A,B)
def apply_perspective(
# 原始坐标
coords:Tensor, # 扭曲变换矩阵
coeffs:Tensor
):"Apply perspective tranform on `coords` with `coeffs`"
= coords.shape
sz = coords.view(sz[0], -1, 2)
coords = torch.cat([coeffs, t1(coeffs[:,:1])], dim=1).view(coeffs.shape[0], 3,3)
coeffs = coords @ coeffs[...,:2].transpose(1,2) + coeffs[...,2].unsqueeze(1)
coords1 if (coords1[...,2]==0.).any(): return coords[...,:2].view(*sz)
= coords1/coords1[...,2].unsqueeze(-1)
coords return coords[...,:2].view(*sz)
class _WarpCoord():
def __init__(self, magnitude=0.2, p=0.5, draw_x=None, draw_y=None, batch=False):
store_attr()self.coeffs = None
def _def_draw(self, x):
if not self.batch: return x.new_empty(x.size(0)).uniform_(-self.magnitude, self.magnitude)
return x.new_zeros(x.size(0)) + random.uniform(-self.magnitude, self.magnitude)
def before_call(self, x):
= _draw_mask(x, self._def_draw, self.draw_x, p=self.p, batch=self.batch)
x_t = _draw_mask(x, self._def_draw, self.draw_y, p=self.p, batch=self.batch)
y_t = torch.tensor([[-1,-1], [-1,1], [1,-1], [1,1]], dtype=x.dtype, device=x.device)
orig_pts self.orig_pts = orig_pts.unsqueeze(0).expand(x.size(0),4,2)
= stack([stack([-1-y_t, -1-x_t]), stack([-1+y_t, 1+x_t]),
targ_pts 1+y_t, -1+x_t]), stack([ 1-y_t, 1-x_t])])
stack([ self.targ_pts = targ_pts.permute(2,0,1)
def __call__(self, x, invert=False):
= find_coeffs(self.targ_pts, self.orig_pts) if invert else find_coeffs(self.orig_pts, self.targ_pts)
coeffs return apply_perspective(x, coeffs)
@patch
@delegates(_WarpCoord.__init__)
def warp(x:TensorImage|TensorMask|TensorPoint|TensorBBox,
int|tuple=None, # 如果指定一个值,则输出尺寸将重复。
size:str='bilinear', # PyTorch 中的 `F.grid_sample` 插值方法应用于 `x`
mode:=PadMode.Reflection, # 应用于 `x` 的填充
pad_modebool=True, # PyTorch 中的 `F.grid_sample` 函数,其 `align_corners` 参数
align_corners:**kwargs
):= _get_default(x, mode, pad_mode)
x0,mode,pad_mode = _WarpCoord(**kwargs)
coord_tfm
coord_tfm.before_call(x0)return x.affine_coord(coord_tfm=coord_tfm, sz=size, mode=mode, pad_mode=pad_mode, align_corners=align_corners)
= tensor([[1.,0.,0.,0.,1.], [0.,0.,0.,0.,0.], [0.,0.,0.,0.,0.], [0.,0.,0.,0.,0.], [0.,0.,0.,0.,0.]])
x1 = tensor([[0.,0.,0.,0.,0.], [0.,0.,0.,0.,0.], [0.,0.,0.,0.,0.], [0.,0.,0.,0.,0.], [1.,0.,0.,0.,1.]])
x2 = tensor([[1.,0.,0.,0.,0.], [0.,0.,0.,0.,0.], [0.,0.,0.,0.,0.], [0.,0.,0.,0.,0.], [1.,0.,0.,0.,0.]])
x3 = tensor([[0.,0.,0.,0.,1.], [0.,0.,0.,0.,0.], [0.,0.,0.,0.,0.], [0.,0.,0.,0.,0.], [0.,0.,0.,0.,1.]])
x4 = TensorImage(stack([x1,x2,x3,x4])[:,None])
y = y.warp(p=1., draw_x=[0.,0,-0.5,0.5], draw_y=[-0.5,0.5,0.,0.])
y 0,0], tensor([[0.,1.,0.,1.,0.], [0.,0.,0.,0.,0.], [0.,0.,0.,0.,0.], [0.,0.,0.,0.,0.], [0.,0.,0.,0.,0.]]))
test_eq(y[1,0], tensor([[0.,0.,0.,0.,0.], [0.,0.,0.,0.,0.], [0.,0.,0.,0.,0.], [0.,0.,0.,0.,0.], [0.,1.,0.,1.,0.]]))
test_eq(y[2,0], tensor([[0.,0.,0.,0.,0.], [1.,0.,0.,0.,0.], [0.,0.,0.,0.,0.], [1.,0.,0.,0.,0.], [0.,0.,0.,0.,0.]]))
test_eq(y[3,0], tensor([[0.,0.,0.,0.,0.], [0.,0.,0.,0.,1.], [0.,0.,0.,0.,0.], [0.,0.,0.,0.,1.], [0.,0.,0.,0.,0.]])) test_eq(y[
= torch.tensor([[[-1.,-1], [1,-1]], [[-1,1],[1,1]], [[-1,-1], [-1,1]], [[1,-1], [1,1]]])
x = TensorPoint(x).warp(p=1., draw_x=[0.,0,-0.5,0.5], draw_y=[-0.5,0.5,0.,0.])
y -0.5,-1], [0.5,-1]], [[-0.5,1],[0.5,1]], [[-1,-0.5], [-1,0.5]], [[1,-0.5], [1,0.5]]])) test_eq(y, torch.tensor([[[
class Warp(AffineCoordTfm):
"Apply perspective warping with `magnitude` and `p` on a batch of matrices"
def __init__(self,
float=0.2, # 默认的扭曲幅度
magnitude:float=0.5, # 应用翘曲的概率
p:float|MutableSequence|callable=None, # 用户定义的x方向扭曲幅度
draw_x:float|MutableSequence|callable=None, # 用户定义的y方向扭曲幅度
draw_y:int|tuple=None, # 如果指定一个值,则输出尺寸将重复。
size:str='bilinear', # PyTorch `F.grid_sample` 插值
mode:=PadMode.Reflection, # 一个 `PadMode`
pad_modebool=False, # 在整个批次中应用相同的经纱
batch:bool=True # PyTorch 中的 `F.grid_sample` 函数,其 `align_corners` 参数
align_corners:
):
store_attr()= _WarpCoord(magnitude=magnitude, p=p, draw_x=draw_x, draw_y=draw_y, batch=batch)
coord_fs super().__init__(coord_fs=coord_fs, size=size, mode=mode, pad_mode=pad_mode, align_corners=align_corners )
调用 @patch
的 warp
行为适用于 TensorImage
、TensorMask
、TensorPoint
和 TensorBBox
draw_x
和 draw_y
可以指定,如果您想自定义在应用变换时所选择的幅度(默认是在 -magnitude
和 magnitude
之间的随机浮点数)。每个值可以是一个浮点数、一个浮点数列表(该列表的长度应等于或大于批次的大小),或是一个返回浮点张量的可调用对象。
= [-0.4, -0.2, 0., 0.2, 0.4]
scales =_batch_ex(5)
imgs= Warp(p=1., draw_y=scales, draw_x=0.)
vert_warp = Warp(p=1., draw_x=scales, draw_y=0.)
horz_warp ='Vertical warping', titles=[f'magnitude {i}' for i in scales])
show_images( vert_warp(imgs) ,suptitle='Horizontal warping', titles=[f'magnitude {i}' for i in scales]) show_images( horz_warp(imgs) ,suptitle
= tensor([[1.,0.,0.,0.,1.], [0.,0.,0.,0.,0.], [0.,0.,0.,0.,0.], [0.,0.,0.,0.,0.], [0.,0.,0.,0.,0.]])
x1 = tensor([[0.,0.,0.,0.,0.], [0.,0.,0.,0.,0.], [0.,0.,0.,0.,0.], [0.,0.,0.,0.,0.], [1.,0.,0.,0.,1.]])
x2 = tensor([[1.,0.,0.,0.,0.], [0.,0.,0.,0.,0.], [0.,0.,0.,0.,0.], [0.,0.,0.,0.,0.], [1.,0.,0.,0.,0.]])
x3 = tensor([[0.,0.,0.,0.,1.], [0.,0.,0.,0.,0.], [0.,0.,0.,0.,0.], [0.,0.,0.,0.,0.], [0.,0.,0.,0.,1.]])
x4 = Warp(p=1., draw_x=[0.,0,-0.5,0.5], draw_y=[-0.5,0.5,0.,0.])
warp = warp(TensorImage(stack([x1,x2,x3,x4])[:,None]), split_idx=0)
y 0,0], tensor([[0.,1.,0.,1.,0.], [0.,0.,0.,0.,0.], [0.,0.,0.,0.,0.], [0.,0.,0.,0.,0.], [0.,0.,0.,0.,0.]]))
test_eq(y[1,0], tensor([[0.,0.,0.,0.,0.], [0.,0.,0.,0.,0.], [0.,0.,0.,0.,0.], [0.,0.,0.,0.,0.], [0.,1.,0.,1.,0.]]))
test_eq(y[2,0], tensor([[0.,0.,0.,0.,0.], [1.,0.,0.,0.,0.], [0.,0.,0.,0.,0.], [1.,0.,0.,0.,0.], [0.,0.,0.,0.,0.]]))
test_eq(y[3,0], tensor([[0.,0.,0.,0.,0.], [0.,0.,0.,0.,1.], [0.,0.,0.,0.,0.], [0.,0.,0.,0.,1.], [0.,0.,0.,0.,0.]])) test_eq(y[
= torch.tensor([[[-1.,-1], [1,-1]], [[-1,1],[1,1]], [[-1,-1], [-1,1]], [[1,-1], [1,1]]])
x = warp(TensorPoint(x), split_idx=0)
y -0.5,-1], [0.5,-1]], [[-0.5,1],[0.5,1]], [[-1,-0.5], [-1,0.5]], [[1,-0.5], [1,0.5]]])) test_eq(y, torch.tensor([[[
照明变换
光照变换是影响图像中光线表现的转换。这些变换不像之前的变换那样改变物体的位置,而是模拟光线在场景中的变化。 simclr 论文 对这些变换与其他变换进行了评估,以用于自监督图像分类的应用场景,值得注意的是,他们使用“颜色”和“颜色扭曲”来指代这些变换的组合。
@patch
def lighting(x: TensorImage, func): return torch.sigmoid(func(logit(x)))
class SubTensorImage(TensorImage):
pass
=SubTensorImage(1)
tassert isinstance(t.lighting(noop), SubTensorImage)
大多数灯光转换在“logit 空间”中效果更好,因为我们不想通过超出最大或最小亮度来使图像过曝。对 logit 进行 sigmoid 变换可以让我们回到“线性空间”。
=TensorImage(torch.tensor([.01* i for i in range(0,101)]))
x= lambda x:(2*(x-0.5)+0.5).clamp(0,1) #蓝线
f_lin= lambda x:2*x #红线
f_log'b',x,x.lighting(f_log),'r'); plt.plot(x,f_lin(x),
上面的图显示了在线性空间和对数空间中进行对比变换的结果。注意到蓝色线性图必须被限制,我们失去了0.0与0.2相比的大小信息。而在红色图中,值呈曲线形状,因此我们保留了这种相对信息。
首先,我们创建一个通用的 SpaceTfm
。这使我们可以将多个变换组合在一起,从而我们只需在进行多个变换之前转换一次到某个空间。space_fn
必须从 rgb 转换到某个空间,应用一个函数,然后再转换回 rgb。fs
应该是类似列表的,包含将被组合在一起的函数。
class SpaceTfm(RandTransform):
"Apply `fs` to the logits"
= 40
order def __init__(self,
callable|MutableSequence, # 应用于空间的变换函数
fs:callable, # 将RGB转换为某个空间,应用`fs`后再转换回RGB的函数
space_fn:**kwargs
):super().__init__(**kwargs)
self.space_fn=space_fn
self.fs=L(fs)
def before_call(self,
b, int, # 列车/验证数据集索引
split_idx:
):self.do = True
while isinstance(b, tuple): b = b[0]
for t in self.fs: t.before_call(b)
def compose(self,
callable # 组合变换函数
tfm:
):"Compose `self` with another `LightingTransform`"
self.fs += tfm.fs
def encodes(self,x:TensorImage): return self.space_fn(x,partial(compose_tfms, tfms=self.fs))
LightingTfm
是一个 SpaceTfm
,它使用 TensorImage.lighting
转换到对数空间。使用此方法可以在图像变得非常暗或非常亮时限制细节的丢失。
class LightingTfm(SpaceTfm):
"Apply `fs` to the logits"
= 40
order def __init__(self,
callable|MutableSequence, # 在logit空间中应用的转换函数,
fs:**kwargs
):super().__init__(fs, TensorImage.lighting, **kwargs)
亮度是指场景中的光线量。它可以是零,即图像完全黑色,或者是完全白色的值。这在你预计数据集中有过曝或欠曝图像时可能特别有用。
class _BrightnessLogit():
def __init__(self, max_lighting=0.2, p=0.75, draw=None, batch=False): store_attr()
def _def_draw(self, x):
if not self.batch: return x.new_empty(x.size(0)).uniform_(0.5*(1-self.max_lighting), 0.5*(1+self.max_lighting))
return x.new_zeros(x.size(0)) + random.uniform(0.5*(1-self.max_lighting), 0.5*(1+self.max_lighting))
def before_call(self, x):
self.change = _draw_mask(x, self._def_draw, draw=self.draw, p=self.p, neutral=0.5, batch=self.batch)
def __call__(self, x): return x.add_(logit(self.change[:,None,None,None]))
@patch
@delegates(_BrightnessLogit.__init__)
def brightness(x: TensorImage, **kwargs):
= _BrightnessLogit(**kwargs)
func
func.before_call(x)return x.lighting(func)
class Brightness(LightingTfm):
def __init__(self,
float=0.2, # 最大亮度变化幅度
max_lighting:float=0.75, # 应用变换的概率
p:float|MutableSequence|callable=None, # 批量转换的用户定义行为
draw:=False # 将整个批次的亮度调整为相同
batch
):"Apply change in brightness of `max_lighting` to batch of images with probability `p`."
store_attr()super().__init__(_BrightnessLogit(max_lighting, p, draw, batch))
调用 @patch
的 Brightness
行为用于 TensorImage
draw
可以指定,如果您想自定义在应用变换时选择的幅度(默认值是介于 -0.5*(1-max_lighting)
和 0.5*(1+max_lighting)
之间的随机浮点数)。每个值可以是一个浮点数,一个浮点数列表(此时列表长度应大于或等于批处理的大小),或者一个返回浮点张量的可调用对象。
= [0.1, 0.3, 0.5, 0.7, 0.9]
scales = _batch_ex(5).brightness(draw=scales, p=1.)
y = plt.subplots(1,5, figsize=(15,3))
fig,axs for i,ax in enumerate(axs.flatten()):
=ax, title=f'scale {scales[i]}') show_image(y[i], ctx
= torch.randn(5, 3, 4, 4)
x = Brightness(draw=scales, p=1.)
bright print('***', bright.space_fn)
= bright(TensorImage(x), split_idx=0)
y = torch.sigmoid(logit(x) + logit(tensor(scales))[:,None,None,None])
y1
test_close(y, y1)
=0), x)
test_eq(bright(TensorMask(x), split_idx=0), x)
test_eq(bright(TensorPoint(x), split_idx=0), x) test_eq(bright(TensorBBox(x), split_idx
*** <function TensorImage.lighting at 0x14bfc0d30>
对比度将像素推向最大值或最小值。对比度的最小值是一个纯灰色图像。举个例子,拍摄一个在黑暗房间中明亮光源的照片。你的眼睛应该能够看到房间中的一些细节,但拍摄出来的照片应该具有更高的对比度,背景中的所有细节都因为黑暗而缺失。这是这个变换可以帮助模拟的一个例子。
class _ContrastLogit():
def __init__(self, max_lighting=0.2, p=0.75, draw=None, batch=False): store_attr()
def _def_draw(self, x):
if not self.batch: res = x.new_empty(x.size(0)).uniform_(math.log(1-self.max_lighting), -math.log(1-self.max_lighting))
else: res = x.new_zeros(x.size(0)) + random.uniform(math.log(1-self.max_lighting), -math.log(1-self.max_lighting))
return torch.exp(res)
def before_call(self, x):
self.change = _draw_mask(x, self._def_draw, draw=self.draw, p=self.p, neutral=1., batch=self.batch)
def __call__(self, x): return x.mul_(self.change[:,None,None,None])
@patch
@delegates(_ContrastLogit.__init__)
def contrast(x: TensorImage, **kwargs):
= _ContrastLogit(**kwargs)
func
func.before_call(x)return x.lighting(func)
class Contrast(LightingTfm):
"Apply change in contrast of `max_lighting` to batch of images with probability `p`."
def __init__(self,
=0.2, # 对比度变化的最大范围
max_lighting=0.75, # 应用变换的概率
pfloat|MutableSequence|callable=None, # 批量转换的用户定义行为
draw:=False
batch
):
store_attr()super().__init__(_ContrastLogit(max_lighting, p, draw, batch))
调用 @patch
的 TensorImage
的 contrast
行为
draw
可以被指定用来定制在应用变换时选择的幅度(默认为在 (1-max_lighting)
和 1/(1-max_lighting)
之间随机取的浮点数)。每个值可以是一个浮点数,一个浮点数列表(此时列表长度应大于或等于批次大小),或者是一个返回浮点张量的可调用对象。
= [0.65, 0.8, 1., 1.25, 1.55]
scales = _batch_ex(5).contrast(p=1., draw=scales)
y = plt.subplots(1,5, figsize=(15,3))
fig,axs for i,ax in enumerate(axs.flatten()): show_image(y[i], ctx=ax, title=f'scale {scales[i]}')
= torch.randn(5, 3, 4, 4)
x = Contrast(p=1., draw=scales)
cont = cont(TensorImage(x), split_idx=0)
y = torch.sigmoid(logit(x) * tensor(scales)[:,None,None,None])
y1
test_close(y, y1)
=0), x)
test_eq(cont(TensorMask(x), split_idx=0), x)
test_eq(cont(TensorPoint(x), split_idx=0), x) test_eq(cont(TensorBBox(x), split_idx
def grayscale(x):
"Tensor to grayscale tensor. Uses the ITU-R 601-2 luma transform. "
return (x*torch.tensor([0.2989,0.5870,0.1140],device=x.device)[...,None,None]).sum(1)[:,None]
以上只是将图像转换为灰度的一种方法。我们选择这种方法是因为它速度较快。请注意,每个通道的权重之和为1。
f'{sum([0.2989,0.5870,0.1140]):.3f}'
'1.000'
class _SaturationLogit():
def __init__(self, max_lighting=0.2, p=0.75, draw=None, batch=False): store_attr()
def _def_draw(self, x):
if not self.batch: res = x.new_empty(x.size(0)).uniform_(math.log(1-self.max_lighting), -math.log(1-self.max_lighting))
else: res = x.new_zeros(x.size(0)) + random.uniform(math.log(1-self.max_lighting), -math.log(1-self.max_lighting))
return torch.exp(res)
def before_call(self, x):
self.change = _draw_mask(x, self._def_draw, draw=self.draw, p=self.p, neutral=1., batch=self.batch)
def __call__(self, x):
#在原位插值灰度图像和原始图像
= grayscale(x)
gs 1-self.change[:,None,None,None])
gs.mul_(self.change[:,None,None,None])
x.mul_(return x.add_(gs)
@patch
@delegates(_SaturationLogit.__init__)
def saturation(x: TensorImage, **kwargs):
= _SaturationLogit(**kwargs)
func
func.before_call(x)return x.lighting(func)
class Saturation(LightingTfm):
"Apply change in saturation of `max_lighting` to batch of images with probability `p`."
# 参考:https://pytorch.org/docs/stable/torchvision/transforms.html#torchvision.transforms.functional.adjust_saturation
def __init__(self,
float=0.2, # 最大亮度变化幅度
max_lighting:float=0.75, # 应用变换的概率
p:float|MutableSequence|callable=None, # 批量转换的用户定义行为
draw:bool=False # 对整个批次应用相同的饱和度
batch:
):
store_attr()super().__init__(_SaturationLogit(max_lighting, p, draw, batch))
调用 @patch
的 saturation
行为用于 TensorImage
= [0., 0.5, 1., 1.5, 2.0]
scales = _batch_ex(5).saturation(p=1., draw=scales)
y = plt.subplots(1,5, figsize=(15,3))
fig,axs for i,ax in enumerate(axs.flatten()): show_image(y[i], ctx=ax, title=f'scale {scales[i]}')
饱和度控制图像中色彩的数量,但不影响图像的亮度或黑暗程度。它对中性色(如白色、灰色和黑色)没有影响。在零饱和度时,实际上会得到一幅灰度图像。将饱和度推高到超过1时,会使更多中性色呈现出任何潜在的色彩。
= torch.randn(5, 3, 4, 4)
x = Saturation(p=1., draw=scales)
sat = sat(TensorImage(x), split_idx=0)
y = logit(x) * tensor(scales)[:,None,None,None]
y1 += grayscale(logit(x)) * (1-tensor(scales)[:,None,None,None])
y1 = torch.sigmoid(y1)
y1
test_close(y, y1)
=0), x)
test_eq(sat(TensorMask(x), split_idx=0), x)
test_eq(sat(TensorPoint(x), split_idx=0), x) test_eq(sat(TensorBBox(x), split_idx
rgb2hsv
和 hsv2rgb
是用于在 hsv 空间之间转换的工具。Hsv 空间代表色相、饱和度和明度空间。这使我们能够更容易地执行某些变换。
max(tensor([1]).as_subclass(TensorBase), dim=0) torch.
torch.return_types.max(
values=TensorBase(1),
indices=TensorBase(0))
def rgb2hsv(
# 一批RGB图像的张量
img:Tensor
):"Converts a RGB image to an HSV image. Note: Will not work on logit space images."
= img.unbind(1)
r, g, b # 由于 https://github.com/pytorch/pytorch/issues/47069 的问题,临时注释掉了。
# maxc = torch.max(img, dim=1).values
# minc = torch.min(img, dim=1).values
= torch.max(img, dim=1)[0]
maxc = torch.min(img, dim=1)[0]
minc = maxc == minc
eqc
= maxc - minc
cr = cr / torch.where(eqc, maxc.new_ones(()), maxc)
s = torch.where(eqc, maxc.new_ones(()), cr)
cr_divisor = (maxc - r) / cr_divisor
rc = (maxc - g) / cr_divisor
gc = (maxc - b) / cr_divisor
bc
= (maxc == r) * (bc - gc)
hr = ((maxc == g) & (maxc != r)) * (2.0 + rc - bc)
hg = ((maxc != g) & (maxc != r)) * (4.0 + gc - rc)
hb = (hr + hg + hb)
h = torch.fmod((h / 6.0 + 1.0), 1.0)
h
return torch.stack((h, s, maxc),dim=1)
def hsv2rgb(
# 一批图像 `Tensor 以 HSV 格式表示`
img:Tensor,
):"Converts a HSV image to an RGB image."
= img.unbind(1)
h, s, v = torch.floor(h * 6.0)
i = (h * 6.0) - i
f = i.to(dtype=torch.int32)
i
= torch.clamp((v * (1.0 - s)), 0.0, 1.0)
p = torch.clamp((v * (1.0 - s * f)), 0.0, 1.0)
q = torch.clamp((v * (1.0 - s * (1.0 - f))), 0.0, 1.0)
t = i % 6
i
= i[:,None] == torch.arange(6,device=i.device)[:, None, None][None]
mask
= torch.stack((v, q, p, p, t, v),dim=1)
a1 = torch.stack((t, v, v, q, p, p),dim=1)
a2 = torch.stack((p, p, t, v, v, q),dim=1)
a3 = torch.stack((a1, a2, a3),dim=1)
a4
return torch.einsum("nijk, nxijk -> nxjk", mask.to(dtype=img.dtype), a4)
与在logit空间中进行的lighting
非常相似,hsv变换是在hsv空间中进行的。我们可以组合在hsv空间中进行的任何变换。
@patch
def hsv(x: TensorImage, func): return TensorImage(hsv2rgb(func(rgb2hsv(x))))
class HSVTfm(SpaceTfm):
"Apply `fs` to the images in HSV space"
def __init__(self, fs, **kwargs):
super().__init__(fs, TensorImage.hsv, **kwargs)
调用 @patch
装饰的 TensorImage
的 hsv
行为
=plt.subplots(figsize=(20, 4),ncols=5)
fig,axs0].set_ylabel('Hue')
axs[for ax in axs:
'Saturation')
ax.set_xlabel(
ax.set_yticklabels([])
ax.set_xticklabels([])
=torch.stack([torch.arange(0,2.1,0.01)[:,None].repeat(1,210),
hsvs0,1.05,0.005)[None].repeat(210,1),
torch.arange(210,210])])[None]
torch.ones([for ax,i in zip(axs,range(0,5)):
if i>0: hsvs[:,2].mul_(0.80)
'V='+'%.1f' %0.8**i)
ax.set_title(0].permute(1,2,0)) ax.imshow(hsv2rgb(hsvs)[
对于色相变换,我们使用HSV空间而不是对数空间。HSV代表色相、饱和度和明度。HSV空间中的色相仅在彩虹的颜色之间循环。注意没有最大值,因为颜色会重复。
上面是一些在不同明度(V)下的色相(H)和饱和度(S)的示例。需要注意的是,HSV空间中的一个特性是,当饱和度最低时,V控制你获得的颜色。
class _Hue():
def __init__(self, max_hue=0.1, p=0.75, draw=None, batch=False): store_attr()
def _def_draw(self, x):
if not self.batch: res = x.new_empty(x.size(0)).uniform_(math.log(1-self.max_hue), -math.log(1-self.max_hue))
else: res = x.new_zeros(x.size(0)) + random.uniform(math.log(1-self.max_hue), -math.log(1-self.max_hue))
return torch.exp(res)
def before_call(self, x):
self.change = _draw_mask(x, self._def_draw, draw=self.draw, p=self.p, neutral=0., batch=self.batch)
def __call__(self, x):
= x.unbind(1)
h,s,v += self.change[:,None,None]
h = h % 1.0
h return x.set_(torch.stack((h, s, v),dim=1))
@patch
@delegates(_Hue.__init__)
def hue(x: TensorImage, **kwargs):
= _Hue(**kwargs)
func
func.before_call(x)return TensorImage(x.hsv(func))
class Hue(HSVTfm):
"Apply change in hue of `max_hue` to batch of images with probability `p`."
# 参考:https://pytorch.org/docs/stable/torchvision/transforms.html#torchvision.transforms.functional.adjust_hue
def __init__(self,
float=0.1, # 最大色相变化幅度
max_hue:float=0.75, # 应用变换的概率
p:float|MutableSequence|callable=None, # 批量转换的用户定义行为
draw:=False # 将相同的色调应用于整个批次
batch
):super().__init__(_Hue(max_hue, p, draw, batch))
调用 @patch
的 hue
行为用于 TensorImage
= [0.5, 0.75, 1., 1.5, 1.75]
scales = _batch_ex(len(scales)).hue(p=1., draw=scales)
y = plt.subplots(1,len(scales), figsize=(15,3))
fig,axs for i,ax in enumerate(axs.flatten()): show_image(y[i], ctx=ax, title=f'scale {scales[i]}')
0:2], y[3:5])
test_close(y[2],_batch_ex(1))
test_close(y[
= torch.randn(5, 3, 4, 4)
x = Hue(p=1., draw=scales)
hue =0),TensorImage(x).hue(p=1.,draw=scales))
test_close(hue(TensorImage(x), split_idx=0), x)
test_eq(hue(TensorMask(x), split_idx=0), x)
test_eq(hue(TensorPoint(x), split_idx=0), x) test_eq(hue(TensorBBox(x), split_idx
随机擦除
随机擦除数据增强。这个由罗斯·怀特曼设计的变体,可以在对图像张量进行归一化后,应用于一个批次或单个图像。
def cutout_gaussian(
# 输入图像
x:Tensor, list # 需要裁剪的区域列表。顺序为:rl, rh, cl, ch
areas:
):"Replace all `areas` in `x` with N(0,1) noise"
= x.shape[-3:]
chan,img_h,img_w for rl,rh,cl,ch in areas: x[..., rl:rh, cl:ch].normal_()
return x
由于这应该在归一化之后应用,我们将定义一个助手函数在归一化内部应用一个函数。
def norm_apply_denorm(
# 输入图像
x:Tensor, callable, # 应用功能
f:callable # 归一化变换
nrm:
):"Normalize `x` with `nrm`, then apply `f`, then denormalize"
= f(nrm(x.clone()))
y return nrm.decode(y).clamp(0,1)
= Normalize.from_stats(*imagenet_stats, cuda=False) nrm
= partial(cutout_gaussian, areas=[(100,200,100,200),(200,300,200,300)])
f 0]); show_image(norm_apply_denorm(timg, f, nrm)[
def _slice(area, sz):
= int(round(math.sqrt(area)))
bound = random.randint(0, max(sz-bound, 0))
loc return loc,loc+bound
class RandomErasing(RandTransform):
"Randomly selects a rectangle region in an image and randomizes its pixels."
= 100 # 归一化后
order def __init__(self,
float=0.5, # 应用随机擦除的概率
p:float=0., # 最小擦除区域比例
sl:float=0.3, # 最大擦除区域比例
sh:float=0.3, # 擦除区域的最小纵横比
min_aspect:int=1 # 每张图像的最大擦除块数,每个框的面积按数量缩放
max_count:
):
store_attr()super().__init__(p=p)
self.log_ratio = (math.log(min_aspect), math.log(1/min_aspect))
def _bounds(self, area, img_h, img_w):
= random.uniform(self.sl,self.sh) * area
r_area = math.exp(random.uniform(*self.log_ratio))
aspect return _slice(r_area*aspect, img_h) + _slice(r_area/aspect, img_w)
def encodes(self,x:TensorImage):
= random.randint(1, self.max_count)
count = x.shape[-3:]
_,img_h,img_w = img_h*img_w/count
area = [self._bounds(area, img_h, img_w) for _ in range(count)]
areas return cutout_gaussian(x, areas)
= RandomErasing(p=1., max_count=6)
tfm
= subplots(2,3, figsize=(12,6))
_,axs = partial(tfm, split_idx=0)
f for i,ax in enumerate(axs.flatten()): show_image(norm_apply_denorm(timg, f, nrm)[0], ctx=ax)
= RandomErasing(p=1., max_count=6)
tfm
= subplots(2,3, figsize=(12,6))
_,axs = partial(tfm, split_idx=0)
f for i,ax in enumerate(axs.flatten()): show_image(norm_apply_denorm(timg, f, nrm)[0], ctx=ax)
= RandomErasing(p=1., max_count=6)
tfm
= subplots(2,3, figsize=(12,6))
_,axs = partial(tfm, split_idx=1)
f for i,ax in enumerate(axs.flatten()): show_image(norm_apply_denorm(timg, f, nrm)[0], ctx=ax)
一起完成
def _compose_same_tfms(tfms):
= L(tfms)
tfms if len(tfms) == 0: return None
= tfms[0]
res for tfm in tfms[1:]: res.compose(tfm)
return res
def setup_aug_tfms(tfms):
"Go through `tfms` and combines together affine/coord or lighting transforms"
= [tfm for tfm in tfms if isinstance(tfm, AffineCoordTfm)]
aff_tfms = [tfm for tfm in tfms if isinstance(tfm, LightingTfm)]
lig_tfms = [tfm for tfm in tfms if tfm not in aff_tfms+lig_tfms]
others = _compose_same_tfms(lig_tfms)
lig_tfm = _compose_same_tfms(aff_tfms)
aff_tfm = [aff_tfm] if aff_tfm is not None else []
res if lig_tfm is not None: res.append(lig_tfm)
return res + others
#仅限仿射
= [Rotate(draw=10., p=1), Zoom(draw=1.1, draw_x=0.5, draw_y=0.5, p=1.)]
tfms = setup_aug_tfms([Rotate(draw=10., p=1), Zoom(draw=1.1, draw_x=0.5, draw_y=0.5, p=1.)])
comp len(comp), 1)
test_eq(= torch.randn(4,3,5,5)
x 0]._get_affine_mat(x)[...,:2],tfms[0]._get_affine_mat(x)[...,:2] @ tfms[1]._get_affine_mat(x)[...,:2])
test_close(comp[#We can't test that the ouput of comp or the composition of tfms on x is the same cause it's not (1 interpol vs 2 sp)
#仿射变换 + 光照效果
= [Rotate(), Zoom(), Warp(), Brightness(), Flip(), Contrast()]
tfms = setup_aug_tfms(tfms) comp
= comp
aff_tfm,lig_tfm len(aff_tfm.aff_fs+aff_tfm.coord_fs+comp[1].fs), 6)
test_eq(len(aff_tfm.aff_fs), 3)
test_eq(len(aff_tfm.coord_fs), 1)
test_eq(len(lig_tfm.fs), 2) test_eq(
def aug_transforms(
float=1.0, # 应用于 `max_rotate`、`max_lighting` 和 `max_warp` 的乘法
mult:bool=True, # 随机翻转
do_flip:bool=False, # 垂直翻转
flip_vert:float=10., # 最大旋转角度
max_rotate:float=1., # 最小缩放
min_zoom:float=1.1, # 最大变焦
max_zoom:float=0.2, # 最大亮度变化幅度
max_lighting:float=0.2, # 每单位变化的最大翘曲值
max_warp:float=0.75, # 应用仿射变换的概率
p_affine:float=0.75, # 改变亮度和对比度的概率
p_lighting:list=None, # 自定义转换
xtra_tfms:int|tuple=None, # 如果指定一个值,则输出尺寸将重复。
size:str='bilinear', # PyTorch `F.grid_sample` 插值
mode:=PadMode.Reflection, # 一个 `PadMode`
pad_mode=True, # PyTorch `F.grid_sample` 对齐角点
align_corners=False, # 对整个批次应用相同的变换
batch=1. # 裁剪的最小比例,相对于图像区域
min_scale
):"Utility func to easily create a list of flip, rotate, zoom, warp, lighting transforms."
= [],dict(size=size if min_scale==1. else None, mode=mode, pad_mode=pad_mode, batch=batch, align_corners=align_corners)
res,tkw = array([max_rotate,max_lighting,max_warp])*mult
max_rotate,max_lighting,max_warp if do_flip: res.append(Dihedral(p=0.5, **tkw) if flip_vert else Flip(p=0.5, **tkw))
if max_warp: res.append(Warp(magnitude=max_warp, p=p_affine, **tkw))
if max_rotate: res.append(Rotate(max_deg=max_rotate, p=p_affine, **tkw))
if min_zoom<1 or max_zoom>1: res.append(Zoom(min_zoom=min_zoom, max_zoom=max_zoom, p=p_affine, **tkw))
if max_lighting:
=max_lighting, p=p_lighting, batch=batch))
res.append(Brightness(max_lighting=max_lighting, p=p_lighting, batch=batch))
res.append(Contrast(max_lightingif min_scale!=1.: xtra_tfms = RandomResizedCropGPU(size, min_scale=min_scale, ratio=(1,1)) + L(xtra_tfms)
return setup_aug_tfms(res + L(xtra_tfms))
当 do_flip=True
时,会添加随机翻转(或如果 flip_vert=True
则为对称翻转),概率为 p=0.5
。通过 p_affine
进行最大旋转角度为 max_rotate
的随机旋转,大小在 min_zoom
和 max_zoom
之间的随机缩放,以及最大扭曲为 max_warp
的透视变换。通过 p_lighting
我们进行最大亮度变化和对比度变化为 max_lighting
。可以添加自定义的 xtra_tfms
。size
、mode
和 pad_mode
将用于插值。max_rotate,max_lighting,max_warp
将乘以 mult
,以便您可以更轻松地通过单个参数增加或减少增强效果。
= aug_transforms(pad_mode='zeros', mult=2, min_scale=0.5)
tfms = _batch_ex(9)
y for t in tfms: y = t(y, split_idx=0)
= plt.subplots(1,3, figsize=(12,3))
_,axs for i,ax in enumerate(axs.flatten()): show_image(y[i], ctx=ax)
= aug_transforms(pad_mode='zeros', mult=2, batch=True)
tfms = _batch_ex(9)
y for t in tfms: y = t(y, split_idx=0)
= plt.subplots(1,3, figsize=(12,3))
_,axs for i,ax in enumerate(axs.flatten()): show_image(y[i], ctx=ax)
集成测试
分割
= untar_data(URLs.CAMVID_TINY)
camvid = get_image_files(camvid/'images')
fns = fns[0]
cam_fn = camvid/'labels'/f'{cam_fn.stem}_P{cam_fn.suffix}'
mask_fn def _cam_lbl(fn): return mask_fn
= Datasets([cam_fn]*10, [PILImage.create, [_cam_lbl, PILMask.create]])
cam_dsrc = TfmdDL(cam_dsrc.train, after_item=ToTensor(),
cam_tdl =[IntToFloatTensor(), *aug_transforms()], bs=9)
after_batch=9, vmin=1, vmax=30) cam_tdl.show_batch(max_n
点目标
= untar_data(URLs.MNIST_TINY)
mnist = 'images/mnist3.png'
mnist_fn = np.array([[0,0], [0,35], [28,0], [28,35], [9, 17]])
pnts def _pnt_lbl(fn)->None: return TensorPoint.create(pnts)
= Datasets([mnist_fn]*10, [[PILImage.create, Resize((35,28))], _pnt_lbl])
pnt_dsrc = TfmdDL(pnt_dsrc.train, after_item=[PointScaler(), ToTensor()],
pnt_tdl =[IntToFloatTensor(), *aug_transforms(max_warp=0)], bs=9)
after_batch=9) pnt_tdl.show_batch(max_n
边界框
= untar_data(URLs.COCO_TINY)
coco = get_annotations(coco/'train.json')
images, lbl_bbox =2
idx= coco/'train'/images[idx],lbl_bbox[idx]
coco_fn,bbox
def _coco_bb(x): return TensorBBox.create(bbox[0])
def _coco_lbl(x): return bbox[1]
= Datasets([coco_fn]*10, [PILImage.create, [_coco_bb], [_coco_lbl, MultiCategorize(add_na=True)]], n_inp=1)
coco_dsrc = TfmdDL(coco_dsrc, bs=9, after_item=[BBoxLabeler(), PointScaler(), ToTensor(), Resize(256)],
coco_tdl =[IntToFloatTensor(), *aug_transforms()])
after_batch
=9) coco_tdl.show_batch(max_n
导出 -
from nbdev import nbdev_export
nbdev_export()