! [ -e /content ] && pip install -Uqq fastai # 在Colab上升级fastai
::: {#cell-2 .cell 0=‘默’ 1=‘认’ 2=‘导’ 3=‘出’ 4=’ ’ 5=‘v’ 6=‘i’ 7=‘s’ 8=‘i’ 9=‘o’ 10=‘n’ 11=‘.’ 12=‘c’ 13=‘o’ 14=‘r’ 15=‘e’}
### 默认类级别 3
from __future__ import annotations
from fastai.torch_basics import *
from fastai.data.all import *
from PIL import Image
try: BILINEAR,NEAREST = Image.Resampling.BILINEAR,Image.Resampling.NEAREST
except AttributeError: from PIL.Image import BILINEAR,NEAREST
= ['BILINEAR','NEAREST'] _all_
from fastai.data.external import *
from nbdev.showdoc import *
= ['Image','ToTensor'] _all_
def __repr__(x:Image.Image):
return "<%s.%s image mode=%s size=%dx%d>" % (x.__class__.__module__, x.__class__.__name__, x.mode, x.size[0], x.size[1])
= ([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
imagenet_stats = ([0.491, 0.482, 0.447], [0.247, 0.243, 0.261])
cifar_stats = ([0.131], [0.308]) mnist_stats
= Image.open(TEST_IMAGE).resize((30,20)) im
if not hasattr(Image,'_patched'):
= Image.Image.size.fget
_old_sz @patch(as_prop=True)
def size(x:Image.Image): return fastuple(_old_sz(x))
= True Image._patched
def n_px(x: Image.Image): return x.size[0] * x.size[1]
30*20) test_eq(im.n_px,
def shape(x: Image.Image): return x.size[1],x.size[0]
20,30)) test_eq(im.shape, (
def aspect(x: Image.Image): return x.size[0]/x.size[1]
30/20) test_eq(im.aspect,
def reshape(x: Image.Image, h, w, resample=0):
"`resize` `x` to `(w,h)`"
return x.resize((w,h), resample=resample)
Image.reshape (x:PIL.Image.Image, h, w, resample=0)
to (w,h)
12,10).shape, (12,10)) test_eq(im.reshape(
def to_bytes_format(im:Image.Image, format='png'):
"Convert to bytes, default to PNG format"
= io.BytesIO()
arr format=format)
im.save(arr, return arr.getvalue()
Image.to_bytes_format (im:PIL.Image.Image, format='png')
Convert to bytes, default to PNG format
def to_thumb(self:Image.Image, h, w=None):
"Same as `thumbnail`, but uses a copy"
if w is None: w=h
= self.copy()
im.thumbnail((w,h))return im
Image.to_thumb (h, w=None)
Same as thumbnail
, but uses a copy
def resize_max(x: Image.Image, resample=0, max_px=None, max_h=None, max_w=None):
"`resize` `x` to `max_px`, or `max_h`, or `max_w`"
= x.shape
h,w if max_px and x.n_px>max_px: h,w = fastuple(h,w).mul(math.sqrt(max_px/x.n_px))
if max_h and h>max_h: h,w = (max_h ,max_h*w/h)
if max_w and w>max_w: h,w = (max_w*h/w,max_w )
return x.reshape(round(h), round(w), resample=resample)
=20*30).shape, (20,30))
test_eq(im.resize_max(max_px=300).n_px, 294)
test_eq(im.resize_max(max_px=500, max_h=10, max_w=20).shape, (10,15))
test_eq(im.resize_max(max_px=14, max_w=15).shape, (10,15))
test_eq(im.resize_max(max_h=300, max_h=10, max_w=25).shape, (10,15)) test_eq(im.resize_max(max_px
Image.resize_max (x:PIL.Image.Image, resample=0, max_px=None, max_h=None, max_w=None)
to max_px
, or max_h
, or max_w
def to_image(x):
"Convert a tensor or array to a PIL int8 Image"
if isinstance(x,Image.Image): return x
if isinstance(x,Tensor): x = to_np(x.permute((1,2,0)))
if x.dtype==np.float32: x = (x*255).astype(np.uint8)
return Image.fromarray(x, mode=['RGB','CMYK'][x.shape[0]==4])
def load_image(fn, mode=None):
"Open and load a `PIL.Image` and convert to `mode`"
= Image.open(fn)
im.load()= im._new(im.im)
im return im.convert(mode) if mode else im
def image2tensor(img):
"Transform image to byte tensor in `c*h*w` dim order."
= tensor(img)
res if res.dim()==2: res = res.unsqueeze(-1)
return res.permute(2,0,1)
class PILBase(Image.Image, metaclass=BypassNewMeta):
"Base class for a Pillow `Image` that can show itself and convert to a Tensor"
_bypass_type= {'cmap':'viridis'}
_show_args = {'mode': 'RGB'}
_open_args @classmethod
def create(cls, fn:Path|str|Tensor|ndarray|bytes|Image.Image, **kwargs):
"Return an Image from `fn`"
if isinstance(fn,TensorImage): fn = fn.permute(1,2,0).type(torch.uint8)
if isinstance(fn,TensorMask): fn = fn.type(torch.uint8)
if isinstance(fn,Tensor): fn = fn.numpy()
if isinstance(fn,ndarray): return cls(Image.fromarray(fn))
if isinstance(fn,bytes): fn = io.BytesIO(fn)
if isinstance(fn,Image.Image): return cls(fn)
return cls(load_image(fn, **merge(cls._open_args, kwargs)))
def show(self, ctx=None, **kwargs):
"Show image using `merge(self._show_args, kwargs)`"
return show_image(self, ctx=ctx, **merge(self._show_args, kwargs))
def __repr__(self): return f'{self.__class__.__name__} mode={self.mode} size={"x".join([str(d) for d in self.size])}'
传递给 PILBase
或其继承类的 create
的图像,作为 PyTorch Tensor
、NumPy ndarray
或 Pillow Image
,必须已经是正确的 Pillow 图像格式。例如,对于 PILImage
或 PILImageBW
,必须分别为 uint8
、RGB 或 BW。
class PILImage(PILBase):
"A RGB Pillow `Image` that can show itself and converts to `TensorImage`"
class PILImageBW(PILImage):
"A BW Pillow `Image` that can show itself and converts to `TensorImageBW`"
= {'cmap':'Greys'},{'mode': 'L'} _show_args,_open_args
= PILImage.create(TEST_IMAGE)
im type(im), PILImage)
test_eq(im.mode, str(im), 'PILImage mode=RGB size=1200x803') test_eq(
= PILImage.create(im)
im2 type(im2), PILImage)
test_eq(im2.mode, str(im2), 'PILImage mode=RGB size=1200x803') test_eq(
64,64)) im.resize((
= im.show(figsize=(1,1)) ax
= TensorImage(image2tensor(im))
timg = PILImage.create(timg) tpil
64,64)) tpil.resize((
test_eq(np.array(im), np.array(tpil))
class PILMask(PILBase):
"A Pillow `Image` Mask that can show itself and converts to `TensorMask`"
= {'mode':'L'},{'alpha':0.5, 'cmap':'tab20'} _open_args,_show_args
= PILMask.create(TEST_IMAGE)
im type(im), PILMask)
test_eq(im.mode, str(im), 'PILMask mode=L size=1200x803') test_eq(
= Transform(PILMask.create)
OpenMask = CrossEntropyLossFlat(axis=1)
OpenMask.loss_func = OpenMask PILMask.create
= untar_data(URLs.MNIST_TINY)
mnist = get_image_files(mnist)
fns = TEST_IMAGE_BW mnist_fn
= Transform(PILImageBW.create)
timg = timg(mnist_fn)
mnist_img 28,28))
test_eq(mnist_img.size, (assert isinstance(mnist_img, PILImageBW)
class AddMaskCodes(Transform):
"Add the code metadata to a `TensorMask`"
def __init__(self, codes=None):
self.codes = codes
if codes is not None: self.vocab,self.c = codes,len(codes)
def decodes(self, o:TensorMask):
if self.codes is not None: o.codes=self.codes
return o
= untar_data(URLs.CAMVID_TINY)
camvid = get_image_files(camvid/'images')
fns = fns[0]
cam_fn = camvid/'labels'/f'{cam_fn.stem}_P{cam_fn.suffix}' mask_fn
= PILImage.create(cam_fn)
cam_img 128,96))
test_eq(cam_img.size, (= Transform(PILMask.create)
tmask = tmask(mask_fn)
mask type(mask), PILMask)
test_eq(128,96)) test_eq(mask.size, (
= plt.subplots(1,3, figsize=(12,3))
_,axs =axs[0], title='image')
cam_img.show(ctx=1, ctx=axs[1], vmin=1, vmax=30, title='mask')
mask.show(alpha=axs[2], title='superimposed')
cam_img.show(ctx=axs[2], vmin=1, vmax=30); mask.show(ctx
class TensorPoint(TensorBase):
"Basic type for points in an image"
= dict(s=10, marker='.', c='r')
def create(cls, t, img_size=None)->None:
"Convert an array or a list of points `t` to a `Tensor`"
return cls(tensor(t).view(-1, 2).float(), img_size=img_size)
def show(self, ctx=None, **kwargs):
if 'figsize' in kwargs: del kwargs['figsize']
= self.view(-1,2)
x 0], x[:, 1], **{**self._show_args, **kwargs})
ctx.scatter(x[:, return ctx
= Transform(TensorPoint.create)
TensorPointCreate = MSELossFlat()
TensorPointCreate.loss_func = TensorPointCreate TensorPoint.create
点应该以形状为 (n,2)
的数组/张量或包含两个元素的列表的形式出现。除非您更改 PointScaler
中的默认设置(稍后会提到),坐标应在 0 到宽度/高度之间,第一个是列索引(从 0 到宽度),第二个是行索引(从 0 到高度)。
这与 numpy 或 PyTorch 中数组的通常索引约定不同,但这是 matplotlib 或 PyTorch 内部函数(如 F.grid_sample
= TensorImage(mnist_img.resize((28,35)))
pnt_img = np.array([[0,0], [0,35], [28,0], [28,35], [9, 17]])
pnts = Transform(TensorPoint.create)
tfm = tfm(pnts)
tpnts 5,2])
test_eq(tpnts.shape, [ test_eq(tpnts.dtype, torch.float32)
= pnt_img.show(figsize=(1,1), cmap='Greys')
ctx =ctx); tpnts.show(ctx
def get_annotations(fname, prefix=None):
"Open a COCO style json in `fname` and returns the lists of filenames (with maybe `prefix`) and labelled bboxes."
= json.load(open(fname))
annot_dict = {}, collections.defaultdict(list), collections.defaultdict(list)
id2images, id2bboxes, id2cats = {o['id']:o['name'] for o in annot_dict['categories']}
classes for o in annot_dict['annotations']:
= o['bbox']
bb 'image_id']].append([bb[0],bb[1], bb[0]+bb[2], bb[1]+bb[3]])
id2cats[o[= {o['id']:ifnone(prefix, '') + o['file_name'] for o in annot_dict['images'] if o['id'] in id2bboxes}
id2images = list(id2images.keys())
ids return [id2images[k] for k in ids], [(id2bboxes[k], id2cats[k]) for k in ids]
= untar_data(URLs.COCO_TINY)
coco = get_annotations(coco/'train.json')
test_images, test_lbl_bbox = json.load(open(coco/'train.json'))
annotations = map(lambda x:L(x),annotations.values())
categories, images, annots
test_eq(test_images, images.attrgot(
def bbox_lbls(file_name):
= images.filter(lambda img:img['file_name']==file_name)[0]
img = annots.filter(lambda a:a['image_id'] == img['id'])
bbs = {k['id']:k['name'] for k in categories}
i2o = [i2o[cat] for cat in bbs.attrgot('category_id')]
lbls = [[bb[0],bb[1], bb[0]+bb[2], bb[1]+bb[3]] for bb in bbs.attrgot('bbox')]
bboxes return [bboxes, lbls]
for idx in random.sample(range(len(images)),5):
test_eq(test_lbl_bbox[idx], bbox_lbls(test_images[idx]))
from matplotlib import patches, patheffects
def _draw_outline(o, lw):
=lw, foreground='black'), patheffects.Normal()])
def _draw_rect(ax, b, color='white', text=None, text_size=14, hw=True, rev=False):
= b
lx,ly,w,h if rev: lx,ly,w,h = ly,lx,h,w
if not hw: w,h = w-lx,h-ly
= ax.add_patch(patches.Rectangle((lx,ly), w, h, fill=False, edgecolor=color, lw=2))
patch 4)
_draw_outline(patch, if text is not None:
= ax.text(lx,ly, text, verticalalignment='top', color=color, fontsize=text_size, weight='bold')
patch 1) _draw_outline(patch,
class TensorBBox(TensorPoint):
"Basic type for a tensor of bounding boxes in an image"
def create(cls, x, img_size=None)->None: return cls(tensor(x).view(-1, 4).float(), img_size=img_size)
def show(self, ctx=None, **kwargs):
= self.view(-1,4)
x for b in x: _draw_rect(ctx, b, hw=False, **kwargs)
return ctx
边界框应该作为一个元组传递,格式为形状为 (n,4)
的数组/张量,或者作为包含四个元素的列表的列表及相应标签的列表。除非你在 PointScaler
中更改默认值(见后文),否则每个边界框的坐标应从 0 到宽度/高度,遵循以下约定:x1, y1, x2, y2,其中 (x1,y1) 是左上角,(x2,y2) 是右下角。
我们使用与点相同的约定,x 从 0 到宽度,y 从 0 到高度。
class LabeledBBox(L):
"Basic type for a list of bounding boxes in an image"
def show(self, ctx=None, **kwargs):
for b,l in zip(self.bbox, self.lbl):
if l != '#na#': ctx = retain_type(b, self.bbox).show(ctx=ctx, text=l)
return ctx
= add_props(lambda i,self: self[i]) bbox,lbl
= untar_data(URLs.COCO_TINY)
coco = get_annotations(coco/'train.json')
images, lbl_bbox =2
idx= coco/'train'/images[idx],lbl_bbox[idx]
coco_fn,bbox = timg(coco_fn) coco_img
= LabeledBBox(TensorBBox(bbox[0]), bbox[1])
tbbox = coco_img.show(figsize=(3,3), cmap='Greys')
ctx =ctx); tbbox.show(ctx
= TensorImage
PILImage ._tensor_cls = TensorImageBW
PILImageBW._tensor_cls = TensorMask PILMask ._tensor_cls
def encodes(self, o:PILBase): return o._tensor_cls(image2tensor(o))
def encodes(self, o:PILMask): return o._tensor_cls(image2tensor(o)[0])
= ToTensor()
tfm print(tfm)
encodes: (PILMask,object) -> encodes
(PILBase,object) -> encodes
(PILMask,object) -> encodes
(PILBase,object) -> encodes
<class '__main__.PILImageBW'>
<class 'fastai.torch_core.TensorImageBW'>
= ToTensor()
tfm 1,28,28))
test_eq(tfm(mnist_img).shape, (type(tfm(mnist_img)), TensorImageBW)
test_eq(tfm(mask).shape, (type(tfm(mask)), TensorMask) test_eq(
= Pipeline([PILImageBW.create, ToTensor()])
pipe_img = pipe_img(mnist_fn)
img type(img), TensorImageBW)
test_eq(=(1,1)); pipe_img.show(img, figsize
def _cam_lbl(x): return mask_fn
= Datasets([cam_fn], [[PILImage.create, ToTensor()], [_cam_lbl, PILMask.create, ToTensor()]])
cam_tds 0); show_at(cam_tds,
为了进行数据增强,特别是 grid_sample
方法,点需要用坐标从 -1 到 1 表示(-1 为顶部或左侧,1 为底部或右侧),除非您传递 do_scale=False
。我们还需要确保它们遵循我们的点的 x,y 坐标约定,因此如果您的数据是 y,x 格式,请传递 y_first=True
def _scale_pnts(y, sz, do_scale=True, y_first=False):
if y_first: y = y.flip(1)
= y * 2/tensor(sz).float() - 1 if do_scale else y
res return TensorPoint(res, img_size=sz)
def _unscale_pnts(y, sz): return TensorPoint((y+1) * tensor(sz).float()/2, img_size=sz)
class PointScaler(Transform):
"Scale a tensor representing points"
= 1
order def __init__(self, do_scale=True, y_first=False): self.do_scale,self.y_first = do_scale,y_first
def _grab_sz(self, x):
self.sz = [x.shape[-1], x.shape[-2]] if isinstance(x, Tensor) else x.size
return x
def _get_sz(self, x): return getattr(x, 'img_size') if self.sz is None else self.sz
def setups(self, dl):
= first(dl.do_item(None), risinstance(TensorPoint))
res if res is not None: self.c = res.numel()
def encodes(self, x:PILBase|TensorImageBase): return self._grab_sz(x)
def decodes(self, x:PILBase|TensorImageBase): return self._grab_sz(x)
def encodes(self, x:TensorPoint): return _scale_pnts(x, self._get_sz(x), self.do_scale, self.y_first)
def decodes(self, x:TensorPoint): return _unscale_pnts(x.view(-1, 2), self._get_sz(x))
def _pnt_lbl(x): return TensorPoint.create(pnts)
def _pnt_open(fn): return PILImage(PILImage.create(fn).resize((28,35)))
= Datasets([mnist_fn], [_pnt_open, [_pnt_lbl]])
pnt_tds = TfmdDL(pnt_tds, bs=1, after_item=[PointScaler(), ToTensor()]) pnt_tdl
10) test_eq(pnt_tdl.after_item.c,
= PointScaler()
tfm =False
tfm.as_item= tfm(pnt_tds[0])
test_eq(tfm.sz, x.size) test_eq(y.img_size, x.size)
= pnt_tdl.one_batch()
x,y #正确缩放和翻转
0], tensor([[-1., -1.], [-1., 1.], [1., -1.], [1., 1.], [9/14-1, 17/17.5-1]]))
test_close(y[= pnt_tdl.decode_batch((x,y))[0]
a,b float())
test_eq(b, tensor(pnts).#检查类型
type(x), TensorImage)
test_eq(type(y), TensorPoint)
test_eq(type(a), TensorImage)
test_eq(type(b), TensorPoint)
test_eq(28,35)) #自动选择输入的大小 test_eq(b.img_size, (
=(2,2), cmap='Greys'); pnt_tdl.show_batch(figsize
class BBoxLabeler(Transform):
def setups(self, dl): self.vocab = dl.vocab
def decode (self, x, **kwargs):
self.bbox,self.lbls = None,None
return self._call('decodes', x, **kwargs)
def decodes(self, x:TensorMultiCategory):
self.lbls = [self.vocab[a] for a in x]
return x if self.bbox is None else LabeledBBox(self.bbox, self.lbls)
def decodes(self, x:TensorBBox):
self.bbox = x
return self.bbox if self.lbls is None else LabeledBBox(self.bbox, self.lbls)
#LabeledBBox 可以在 tl 中与 MultiCategorize 一起发送(取决于 tls 的顺序),但它已经解码。
def decodes(self, x:LabeledBBox): return x
def encodes(self, x:TensorBBox):
= self.encodes(cast(x.view(-1,2), TensorPoint))
pnts return cast(pnts.view(-1, 4), TensorBBox)
def decodes(self, x:TensorBBox):
= self.decodes(cast(x.view(-1,2), TensorPoint))
pnts return cast(pnts.view(-1, 4), TensorBBox)
def _coco_bb(x): return TensorBBox.create(bbox[0])
def _coco_lbl(x): return bbox[1]
= Datasets([coco_fn], [PILImage.create, [_coco_bb], [_coco_lbl, MultiCategorize(add_na=True)]], n_inp=1)
coco_tds = TfmdDL(coco_tds, bs=1, after_item=[BBoxLabeler(), PointScaler(), ToTensor()]) coco_tdl
= PointScaler()
tfm =False
tfm.as_item= tfm(coco_tds[0])
test_eq(tfm.sz, x.size) test_eq(y.img_size, x.size)
=True) Categorize(add_na
Categorize -- {'vocab': None, 'sort': True, 'add_na': True}:
encodes: (object,object) -> encodes
decodes: (object,object) -> decodes
(#3) [Pipeline: PILBase.create,Pipeline: _coco_bb,Pipeline: _coco_lbl -> MultiCategorize -- {'vocab': None, 'sort': True, 'add_na': True}]
(PILImage mode=RGB size=128x128,
TensorBBox([[-0.9011, -0.4606, 0.1416, 0.6764],
[ 0.2000, -0.2405, 1.0000, 0.9102],
[ 0.4909, -0.9325, 0.9284, -0.5011]]),
TensorMultiCategory([1, 1, 1]))
= coco_tdl.one_batch()
x,y,z 0], -1+tensor(bbox[0])/64)
test_close(y[0], tensor([1,1,1]))
test_eq(z[= coco_tdl.decode_batch((x,y,z))[0]
a,b,c 0]).float())
test_close(b, tensor(bbox[
test_eq(c.bbox, b)1])
test_eq(c.lbl, bbox[
type(x), TensorImage)
test_eq(type(y), TensorBBox)
test_eq(type(z), TensorMultiCategory)
test_eq(type(a), TensorImage)
test_eq(type(b), TensorBBox)
test_eq(type(c), LabeledBBox)
test_eq(128,128)) test_eq(y.img_size, (
; coco_tdl.show_batch()
= Datasets([coco_fn], [PILImage.create, [_coco_lbl, MultiCategorize(add_na=True)], [_coco_bb]])
coco_tds = TfmdDL(coco_tds, bs=1, after_item=[BBoxLabeler(), PointScaler(), ToTensor()])
= coco_tdl.one_batch()
x,y,z 0], -1+tensor(bbox[0])/64)
test_close(z[0], tensor([1,1,1]))
test_eq(y[= coco_tdl.decode_batch((x,y,z))[0]
a,b,c 1])
test_eq(b, bbox[0]).float())
test_close(c.bbox, tensor(bbox[
test_eq(c.lbl, b)
type(x), TensorImage)
test_eq(type(y), TensorMultiCategory)
test_eq(type(z), TensorBBox)
test_eq(type(a), TensorImage)
test_eq(type(b), MultiCategory)
test_eq(type(c), LabeledBBox)
test_eq(128,128)) test_eq(z.img_size, (
导出 -
from nbdev import nbdev_export