! [ -e /content ] && pip install -Uqq fastai # 在Colab上升级fastai
视觉数据
from __future__ import annotations
from fastai.torch_basics import *
from fastai.data.all import *
from fastai.vision.core import *
import types
from nbdev.showdoc import *
# 来自 fastai 视觉增强库的导入:*
用于在视觉应用程序中获取
DataLoaders
的辅助函数以及更高级的ImageDataLoaders
类
本模块中定义的主要类是 ImageDataLoaders
和 SegmentationDataLoaders
,所以你可能想直接查看它们的定义。它们提供的工厂方法是快速准备训练数据的好方法,更多示例请参见 视觉教程。
辅助函数
@delegates(subplots)
def get_grid(
int, # 返回网格中的轴数
n:int=None, # 返回网格的行数,默认为 `int(math.sqrt(n))`
nrows:int=None, # 返回的网格中的列数,默认为 `ceil(n/rows)`
ncols:tuple=None, # 返回图形的长、宽(英寸)
figsize:bool=False, # 是否将列数和 `n` 加倍
double:str=None, # 如果通过,标题将设置为该数字。
title:bool=False, # 是否返回由 `subplots` 创建的图形
return_fig:bool=True, # 是否将matplot轴展平,以便可以用单个循环对其进行迭代
flatten:**kwargs,
-> (plt.Figure, plt.Axes): # 默认情况下仅返回 `axs`,如果 `return_fig` 设置为 True,则返回 (`fig`, `axs`)。
) "Return a grid of `n` axes, `rows` by `cols`"
if nrows:
= ncols or int(np.ceil(n/nrows))
ncols elif ncols:
= nrows or int(np.ceil(n/ncols))
nrows else:
= int(math.sqrt(n))
nrows = int(np.ceil(n/nrows))
ncols if double: ncols*=2 ; n*=2
= subplots(nrows, ncols, figsize=figsize, **kwargs)
fig,axs if flatten: axs = [ax if i<n else ax.set_axis_off() for i, ax in enumerate(axs.flatten())][:n]
if title is not None: fig.suptitle(title, weight='bold', size=14)
return (fig,axs) if return_fig else axs
这是由show_batch
和show_results
的类型分派版本用于视觉应用的。默认的figsize
是(cols*imsize, rows*imsize+0.6)
。imsize
会传递给subplots
。suptitle
、sharex
、sharey
、squeeze
、subplot_kw
和gridspec_kw
都被传递给plt.subplots。如果return_fig
为True
,则返回fig, axs
,否则仅返回axs
。
def clip_remove_empty(
# 边界框的坐标
bbox:TensorBBox, # 边界框的标签
label:TensorMultiCategory
):"Clip bounding boxes with image border and remove empty boxes along with corresponding labels"
= torch.clamp(bbox, -1, 1)
bbox = ((bbox[...,2] - bbox[...,0])*(bbox[...,3] - bbox[...,1]) <= 0.)
empty return (bbox[~empty], label[TensorBase(~empty)])
这是用于 bb_pad
的。
= TensorBBox([[-2,-0.5,0.5,1.5], [-0.5,-0.5,0.5,0.5], [1,0.5,0.5,0.75], [-0.5,-0.5,0.5,0.5], [-2, -0.5, -1.5, 0.5]])
bb = clip_remove_empty(bb, TensorMultiCategory([1,2,3,2,5]))
bb,lbl -1,-0.5,0.5,1.], [-0.5,-0.5,0.5,0.5], [-0.5,-0.5,0.5,0.5]]))
test_eq(bb, TensorBBox([[1,2,2])) test_eq(lbl, TensorMultiCategory([
def bb_pad(
list, # 包含三元组(图像、边界框、标签)的列表
samples:=0 # 将用于填充每个标签列表的标签
pad_idx
):"Function that collects `samples` of labelled bboxes and adds padding with `pad_idx`."
= [(s[0], *clip_remove_empty(*s[1:])) for s in samples]
samples = max([len(s[2]) for s in samples])
max_len def _f(img,bbox,lbl):
= torch.cat([bbox,bbox.new_zeros(max_len-bbox.shape[0], 4)])
bbox = torch.cat([lbl, lbl .new_zeros(max_len-lbl .shape[0])+pad_idx])
lbl return img,bbox,lbl
return [_f(*s) for s in samples]
这在 BBoxBlock
中使用。
= TensorImage(torch.randn(16,16,3)),TensorImage(torch.randn(16,16,3))
img1,img2 = tensor([[-2,-0.5,0.5,1.5], [-0.5,-0.5,0.5,0.5], [1,0.5,0.5,0.75], [-0.5,-0.5,0.5,0.5]])
bb1 = tensor([1, 2, 3, 2])
lbl1 = tensor([[-0.5,-0.5,0.5,0.5], [-0.5,-0.5,0.5,0.5]])
bb2 = tensor([2, 2])
lbl2 = [(img1, bb1, lbl1), (img2, bb2, lbl2)]
samples = bb_pad(samples)
res = tensor([True,True,False,True])
non_empty 0][0], img1)
test_eq(res[0][1], tensor([[-1,-0.5,0.5,1.], [-0.5,-0.5,0.5,0.5], [-0.5,-0.5,0.5,0.5]]))
test_eq(res[0][2], tensor([1,2,2]))
test_eq(res[1][0], img2)
test_eq(res[1][1], tensor([[-0.5,-0.5,0.5,0.5], [-0.5,-0.5,0.5,0.5], [0,0,0,0]]))
test_eq(res[1][2], tensor([2,2,0])) test_eq(res[
显示方法 -
@typedispatch
def show_batch(x:TensorImage, y, samples, ctxs=None, max_n=10, nrows=None, ncols=None, figsize=None, **kwargs):
if ctxs is None: ctxs = get_grid(min(len(samples), max_n), nrows=nrows, ncols=ncols, figsize=figsize)
= show_batch[object](x, y, samples, ctxs=ctxs, max_n=max_n, **kwargs)
ctxs return ctxs
@typedispatch
def show_batch(x:TensorImage, y:TensorImage, samples, ctxs=None, max_n=10, nrows=None, ncols=None, figsize=None, **kwargs):
if ctxs is None: ctxs = get_grid(min(len(samples), max_n), nrows=nrows, ncols=ncols, figsize=figsize, double=True)
for i in range(2):
2] = [b.show(ctx=c, **kwargs) for b,c,_ in zip(samples.itemgot(i),ctxs[i::2],range(max_n))]
ctxs[i::return ctxs
用于视觉的 TransformBlock
这些是视觉应用为数据块API提供的模块。
def ImageBlock(cls:PILBase=PILImage):
"A `TransformBlock` for images of `cls`"
return TransformBlock(type_tfms=cls.create, batch_tfms=IntToFloatTensor)
def MaskBlock(
list=None # 用于分割掩码的词汇标签
codes:
):"A `TransformBlock` for segmentation masks, potentially with `codes`"
return TransformBlock(type_tfms=PILMask.create, item_tfms=AddMaskCodes(codes=codes), batch_tfms=IntToFloatTensor)
= TransformBlock(type_tfms=TensorPoint.create, item_tfms=PointScaler)
PointBlock = TransformBlock(type_tfms=TensorBBox.create, item_tfms=PointScaler, dls_kwargs = {'before_batch': bb_pad})
BBoxBlock
= "A `TransformBlock` for points in an image"
PointBlock.__doc__ = "A `TransformBlock` for bounding boxes in an image" BBoxBlock.__doc__
='PointBlock') show_doc(PointBlock, name
PointBlock
A TransformBlock
for points in an image
='BBoxBlock') show_doc(BBoxBlock, name
BBoxBlock
A TransformBlock
for bounding boxes in an image
def BBoxLblBlock(
list=None, # 边界框的词汇标签
vocab:bool=True # 将NaN作为背景类添加
add_na:
):"A `TransformBlock` for labeled bounding boxes, potentially with `vocab`"
return TransformBlock(type_tfms=MultiCategorize(vocab=vocab, add_na=add_na), item_tfms=BBoxLabeler)
如果 add_na
为 True
,将为 NaN 添加一个新类别(这将代表背景类)。
图像数据加载器 -
class ImageDataLoaders(DataLoaders):
"Basic wrapper around several `DataLoader`s with factory methods for computer vision problems"
@classmethod
@delegates(DataLoaders.from_dblock)
def from_folder(cls, path, train='train', valid='valid', valid_pct=None, seed=None, vocab=None, item_tfms=None,
=None, img_cls=PILImage, **kwargs):
batch_tfms"Create from imagenet style dataset in `path` with `train` and `valid` subfolders (or provide `valid_pct`)"
= GrandparentSplitter(train_name=train, valid_name=valid) if valid_pct is None else RandomSplitter(valid_pct, seed=seed)
splitter = get_image_files if valid_pct else partial(get_image_files, folders=[train, valid])
get_items = DataBlock(blocks=(ImageBlock(img_cls), CategoryBlock(vocab=vocab)),
dblock =get_items,
get_items=splitter,
splitter=parent_label,
get_y=item_tfms,
item_tfms=batch_tfms)
batch_tfmsreturn cls.from_dblock(dblock, path, path=path, **kwargs)
@classmethod
@delegates(DataLoaders.from_dblock)
def from_path_func(cls, path, fnames, label_func, valid_pct=0.2, seed=None, item_tfms=None, batch_tfms=None,
=PILImage, **kwargs):
img_cls"Create from list of `fnames` in `path`s with `label_func`"
= DataBlock(blocks=(ImageBlock(img_cls), CategoryBlock),
dblock =RandomSplitter(valid_pct, seed=seed),
splitter=label_func,
get_y=item_tfms,
item_tfms=batch_tfms)
batch_tfmsreturn cls.from_dblock(dblock, fnames, path=path, **kwargs)
@classmethod
def from_name_func(cls,
str|Path, # 将默认路径设置为一个目录,`Learner` 可以用来保存模型等文件。
path:list, # A list of `os.Pathlike`'s to individual image files
fnames:callable, # A function that receives a string (the file name) and outputs a label
label_func:**kwargs
-> DataLoaders:
) "Create from the name attrs of `fnames` in `path`s with `label_func`"
if sys.platform == 'win32' and isinstance(label_func, types.LambdaType) and label_func.__name__ == '<lambda>':
# https://medium.com/@jwnx/multiprocessing-serialization-in-python-with-pickle-9844f6fa1812
raise ValueError("label_func couldn't be lambda function on Windows")
= using_attr(label_func, 'name')
f return cls.from_path_func(path, fnames, f, **kwargs)
@classmethod
def from_path_re(cls, path, fnames, pat, **kwargs):
"Create from list of `fnames` in `path`s with re expression `pat`"
return cls.from_path_func(path, fnames, RegexLabeller(pat), **kwargs)
@classmethod
@delegates(DataLoaders.from_dblock)
def from_name_re(cls, path, fnames, pat, **kwargs):
"Create from the name attrs of `fnames` in `path`s with re expression `pat`"
return cls.from_name_func(path, fnames, RegexLabeller(pat), **kwargs)
@classmethod
@delegates(DataLoaders.from_dblock)
def from_df(cls, df, path='.', valid_pct=0.2, seed=None, fn_col=0, folder=None, suff='', label_col=1, label_delim=None,
=None, valid_col=None, item_tfms=None, batch_tfms=None, img_cls=PILImage, **kwargs):
y_block"Create from `df` using `fn_col` and `label_col`"
= f'{Path(path) if folder is None else Path(path)/folder}{os.path.sep}'
pref if y_block is None:
= (is_listy(label_col) and len(label_col) > 1) or label_delim is not None
is_multi = MultiCategoryBlock if is_multi else CategoryBlock
y_block = RandomSplitter(valid_pct, seed=seed) if valid_col is None else ColSplitter(valid_col)
splitter = DataBlock(blocks=(ImageBlock(img_cls), y_block),
dblock =ColReader(fn_col, pref=pref, suff=suff),
get_x=ColReader(label_col, label_delim=label_delim),
get_y=splitter,
splitter=item_tfms,
item_tfms=batch_tfms)
batch_tfmsreturn cls.from_dblock(dblock, df, path=path, **kwargs)
@classmethod
def from_csv(cls, path, csv_fname='labels.csv', header='infer', delimiter=None, quoting=csv.QUOTE_MINIMAL, **kwargs):
"Create from `path/csv_fname` using `fn_col` and `label_col`"
= pd.read_csv(Path(path)/csv_fname, header=header, delimiter=delimiter, quoting=quoting)
df return cls.from_df(df, path=path, **kwargs)
@classmethod
@delegates(DataLoaders.from_dblock)
def from_lists(cls, path, fnames, labels, valid_pct=0.2, seed:int=None, y_block=None, item_tfms=None, batch_tfms=None,
=PILImage, **kwargs):
img_cls"Create from list of `fnames` and `labels` in `path`"
if y_block is None:
= MultiCategoryBlock if is_listy(labels[0]) and len(labels[0]) > 1 else (
y_block if isinstance(labels[0], float) else CategoryBlock)
RegressionBlock = DataBlock.from_columns(blocks=(ImageBlock(img_cls), y_block),
dblock =RandomSplitter(valid_pct, seed=seed),
splitter=item_tfms,
item_tfms=batch_tfms)
batch_tfmsreturn cls.from_dblock(dblock, (fnames, labels), path=path, **kwargs)
= delegates(to=ImageDataLoaders.from_df)(ImageDataLoaders.from_csv)
ImageDataLoaders.from_csv = delegates(to=ImageDataLoaders.from_path_func)(ImageDataLoaders.from_name_func)
ImageDataLoaders.from_name_func = delegates(to=ImageDataLoaders.from_path_func)(ImageDataLoaders.from_path_re)
ImageDataLoaders.from_path_re = delegates(to=ImageDataLoaders.from_name_func)(ImageDataLoaders.from_name_re) ImageDataLoaders.from_name_re
这个类不应该直接使用,而应该优先考虑其中一个工厂方法。所有这些工厂方法接受以下作为参数:
item_tfms
:在将项目分批之前应用于项目的一个或多个变换batch_tfms
:在批次形成后应用于批次的一个或多个变换bs
:批次大小val_bs
:用于验证DataLoader
的批次大小(默认为bs
)shuffle_train
:是否对训练DataLoader
进行洗牌device
:要使用的 PyTorch 设备(默认为default_device()
)
show_doc(ImageDataLoaders.from_folder)
ImageDataLoaders.from_folder
ImageDataLoaders.from_folder (path, train='train', valid='valid', valid_pct=None, seed=None, vocab=None, item_tfms=None, batch_tfms=None, img_cls=<class 'fastai.vision.core.PILImage'>, bs:int=64, val_bs:int=None, shuffle:bool=True, device=None)
Create from imagenet style dataset in path
with train
and valid
subfolders (or provide valid_pct
)
Type | Default | Details | |
---|---|---|---|
path | str | Path | . | Path to put in DataLoaders |
train | str | train | |
valid | str | valid | |
valid_pct | NoneType | None | |
seed | NoneType | None | |
vocab | NoneType | None | |
item_tfms | NoneType | None | |
batch_tfms | NoneType | None | |
img_cls | BypassNewMeta | PILImage | |
bs | int | 64 | Size of batch |
val_bs | int | None | Size of batch for validation DataLoader |
shuffle | bool | True | Whether to shuffle data |
device | NoneType | None | Device to put DataLoaders |
如果提供了 valid_pct
,则会通过设置一定比例的数据用于验证集进行随机拆分(可以选择设置一个 seed
)。如果传递了 vocab
,则仅保留名称在 vocab
中的文件夹。
以下是加载 MNIST 子样本的示例:
= untar_data(URLs.MNIST_TINY)
path = ImageDataLoaders.from_folder(path, img_cls=PILImageBW) dls
= dls.one_batch()
x,y 64, 1, 28, 28]) test_eq(x.shape, [
传递 valid_pct
将忽略有效/训练文件夹并进行新的随机划分:
= ImageDataLoaders.from_folder(path, valid_pct=0.2)
dls 3] dls.valid_ds.items[:
[Path('/home/jhoward/.fastai/data/mnist_tiny/train/7/9307.png'),
Path('/home/jhoward/.fastai/data/mnist_tiny/train/3/8241.png'),
Path('/home/jhoward/.fastai/data/mnist_tiny/valid/3/8924.png')]
show_doc(ImageDataLoaders.from_path_func)
ImageDataLoaders.from_path_func
ImageDataLoaders.from_path_func (path, fnames, label_func, valid_pct=0.2, seed=None, item_tfms=None, batch_tfms=None, img_cls=<class 'fastai.vision.core.PILImage'>, bs:int=64, val_bs:int=None, shuffle:bool=True, device=None)
Create from list of fnames
in path
s with label_func
Type | Default | Details | |
---|---|---|---|
path | str | Path | . | Path to put in DataLoaders |
fnames | |||
label_func | |||
valid_pct | float | 0.2 | |
seed | NoneType | None | |
item_tfms | NoneType | None | |
batch_tfms | NoneType | None | |
img_cls | BypassNewMeta | PILImage | |
bs | int | 64 | Size of batch |
val_bs | int | None | Size of batch for validation DataLoader |
shuffle | bool | True | Whether to shuffle data |
device | NoneType | None | Device to put DataLoaders |
验证集是一个随机的 子集
,其比例由 valid_pct
决定,可以选择使用 seed
来确保结果的可重复性。
以下是如何在 MNIST 数据集上创建与之前示例相同的 DataLoaders
,并使用 label_func
:
= get_image_files(path)
fnames def label_func(x): return x.parent.name
= ImageDataLoaders.from_path_func(path, fnames, label_func) dls
这是另一个关于宠物数据集的例子。这里的文件名都在一个“images”文件夹中,名称的格式为class_name_123.jpg
。正确标记它们的一种方法是将最后一个_
之后的所有内容丢弃:
show_doc(ImageDataLoaders.from_path_re)
ImageDataLoaders.from_path_re
ImageDataLoaders.from_path_re (path, fnames, pat, valid_pct=0.2, seed=None, item_tfms=None, batch_tfms=None, img_cls=<class 'fastai.vision.core.PILImage'>, bs:int=64, val_bs:int=None, shuffle:bool=True, device=None)
Create from list of fnames
in path
s with re expression pat
Type | Default | Details | |
---|---|---|---|
path | str | Path | . | Path to put in DataLoaders |
fnames | |||
pat | |||
valid_pct | float | 0.2 | |
seed | NoneType | None | |
item_tfms | NoneType | None | |
batch_tfms | NoneType | None | |
img_cls | BypassNewMeta | PILImage | |
bs | int | 64 | Size of batch |
val_bs | int | None | Size of batch for validation DataLoader |
shuffle | bool | True | Whether to shuffle data |
device | NoneType | None | Device to put DataLoaders |
验证集是 valid_pct
的一个随机子集,可选地使用 seed
来实现可重复性。
以下是如何在 MNIST 数据集上创建与之前示例相同的 DataLoaders
(在 Windows 上,您需要将前两个 / 更改为 ):
= r'/([^/]*)/\d+.png$'
pat = ImageDataLoaders.from_path_re(path, fnames, pat) dls
show_doc(ImageDataLoaders.from_name_func)
ImageDataLoaders.from_name_func
ImageDataLoaders.from_name_func (path:str|Path, fnames:list, label_func:callable, valid_pct=0.2, seed=None, item_tfms=None, batch_tfms=None, img_cls=<class 'fastai.vision.core.PILImage'>, bs:int=64, val_bs:int=None, shuffle:bool=True, device=None)
Create from the name attrs of fnames
in path
s with label_func
Type | Default | Details | |
---|---|---|---|
path | str | Path | Set the default path to a directory that a Learner can use to save files like models |
|
fnames | list | A list of os.Pathlike ’s to individual image files |
|
label_func | callable | A function that receives a string (the file name) and outputs a label | |
valid_pct | float | 0.2 | |
seed | NoneType | None | |
item_tfms | NoneType | None | |
batch_tfms | NoneType | None | |
img_cls | BypassNewMeta | PILImage | |
bs | int | 64 | Size of batch |
val_bs | int | None | Size of batch for validation DataLoader |
shuffle | bool | True | Whether to shuffle data |
device | NoneType | None | Device to put DataLoaders |
Returns | DataLoaders |
验证集是一个随机子集,大小为 valid_pct
,可以选择使用 seed
来确保可重复性。此方法的功能与 ImageDataLoaders.from_path_func
相同,只是 label_func
应用于每个文件名的名称,而不是完整路径。
show_doc(ImageDataLoaders.from_name_re)
ImageDataLoaders.from_name_re
ImageDataLoaders.from_name_re (path, fnames, pat, bs:int=64, val_bs:int=None, shuffle:bool=True, device=None)
Create from the name attrs of fnames
in path
s with re expression pat
Type | Default | Details | |
---|---|---|---|
path | str | Path | . | Path to put in DataLoaders |
fnames | |||
pat | |||
bs | int | 64 | Size of batch |
val_bs | int | None | Size of batch for validation DataLoader |
shuffle | bool | True | Whether to shuffle data |
device | NoneType | None | Device to put DataLoaders |
验证集是 valid_pct
的随机子集,可以选择使用 seed
来保证可重复性。该方法的功能与 ImageDataLoaders.from_path_re
相同,不同之处在于 pat
应用于每个文件名的名称,而不是完整路径。
show_doc(ImageDataLoaders.from_df)
ImageDataLoaders.from_df
ImageDataLoaders.from_df (df, path='.', valid_pct=0.2, seed=None, fn_col=0, folder=None, suff='', label_col=1, label_delim=None, y_block=None, valid_col=None, item_tfms=None, batch_tfms=None, img_cls=<class 'fastai.vision.core.PILImage'>, bs:int=64, val_bs:int=None, shuffle:bool=True, device=None)
Create from df
using fn_col
and label_col
Type | Default | Details | |
---|---|---|---|
df | |||
path | str | Path | . | Path to put in DataLoaders |
valid_pct | float | 0.2 | |
seed | NoneType | None | |
fn_col | int | 0 | |
folder | NoneType | None | |
suff | str | ||
label_col | int | 1 | |
label_delim | NoneType | None | |
y_block | NoneType | None | |
valid_col | NoneType | None | |
item_tfms | NoneType | None | |
batch_tfms | NoneType | None | |
img_cls | BypassNewMeta | PILImage | |
bs | int | 64 | Size of batch |
val_bs | int | None | Size of batch for validation DataLoader |
shuffle | bool | True | Whether to shuffle data |
device | NoneType | None | Device to put DataLoaders |
验证集是valid_pct
的随机子集,可选择性地使用seed
创建以实现可重复性。或者,如果您的df
中包含valid_col
,请将其名称或索引提供给该参数(该列应对进入验证集的元素标记为True
)。
如果df
中的文件名不应直接连接到path
,您可以向文件名添加额外的folder
。如果它们不包含正确的扩展名,您可以添加suff
。如果您的标签列在每一行中包含多个标签,您可以使用label_delim
来提醒库您有一个多标签问题。
当库自动选择的任务错误时,应传入y_block
,然后您应提供CategoryBlock
、MultiCategoryBlock
或RegressionBlock
。对于更高级的用法,您应使用数据块API。
之前的微型mnist示例也包含了一个数据框版本:
= untar_data(URLs.MNIST_TINY)
path = pd.read_csv(path/'labels.csv')
df df.head()
name | label | |
---|---|---|
0 | train/3/7463.png | 3 |
1 | train/3/9829.png | 3 |
2 | train/3/7881.png | 3 |
3 | train/3/8065.png | 3 |
4 | train/3/7046.png | 3 |
以下是如何使用 ImageDataLoaders.from_df
加载它:
= ImageDataLoaders.from_df(df, path) dls
/home/jhoward/git/fastai/fastai/data/transforms.py:212: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
o = r[c] if isinstance(c, int) or not c in getattr(r, '_fields', []) else getattr(r, c)
这是另一个多标签问题的示例:
= untar_data(URLs.PASCAL_2007)
path = pd.read_csv(path/'train.csv')
df df.head()
fname | labels | is_valid | |
---|---|---|---|
0 | 000005.jpg | chair | True |
1 | 000007.jpg | car | True |
2 | 000009.jpg | horse person | True |
3 | 000012.jpg | car | False |
4 | 000016.jpg | bicycle | True |
= ImageDataLoaders.from_df(df, path, folder='train', valid_col='is_valid') dls
/home/jhoward/git/fastai/fastai/data/transforms.py:212: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
o = r[c] if isinstance(c, int) or not c in getattr(r, '_fields', []) else getattr(r, c)
注意,您也可以将 2
传递给 valid_col(索引,起始为 0)。
show_doc(ImageDataLoaders.from_csv)
ImageDataLoaders.from_csv
ImageDataLoaders.from_csv (path, csv_fname='labels.csv', header='infer', delimiter=None, valid_pct=0.2, seed=None, fn_col=0, folder=None, suff='', label_col=1, label_delim=None, y_block=None, valid_col=None, item_tfms=None, batch_tfms=None, img_cls=<class 'fastai.vision.core.PILImage'>, bs:int=64, val_bs:int=None, shuffle:bool=True, device=None)
Create from path/csv_fname
using fn_col
and label_col
Type | Default | Details | |
---|---|---|---|
path | str | Path | . | Path to put in DataLoaders |
csv_fname | str | labels.csv | |
header | str | infer | |
delimiter | NoneType | None | |
valid_pct | float | 0.2 | |
seed | NoneType | None | |
fn_col | int | 0 | |
folder | NoneType | None | |
suff | str | ||
label_col | int | 1 | |
label_delim | NoneType | None | |
y_block | NoneType | None | |
valid_col | NoneType | None | |
item_tfms | NoneType | None | |
batch_tfms | NoneType | None | |
img_cls | BypassNewMeta | PILImage | |
bs | int | 64 | Size of batch |
val_bs | int | None | Size of batch for validation DataLoader |
shuffle | bool | True | Whether to shuffle data |
device | NoneType | None | Device to put DataLoaders |
与 ImageDataLoaders.from_df
相同,在使用 header
和 delimiter
加载文件后。
以下是使用此方法加载与之前相同的数据集的方式:
= ImageDataLoaders.from_csv(path, 'train.csv', folder='train', valid_col='is_valid') dls
/home/jhoward/git/fastai/fastai/data/transforms.py:212: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
o = r[c] if isinstance(c, int) or not c in getattr(r, '_fields', []) else getattr(r, c)
show_doc(ImageDataLoaders.from_lists)
ImageDataLoaders.from_lists
ImageDataLoaders.from_lists (path, fnames, labels, valid_pct=0.2, seed:int=None, y_block=None, item_tfms=None, batch_tfms=None, img_cls=<class 'fastai.vision.core.PILImage'>, bs:int=64, val_bs:int=None, shuffle:bool=True, device=None)
Create from list of fnames
and labels
in path
Type | Default | Details | |
---|---|---|---|
path | str | Path | . | Path to put in DataLoaders |
fnames | |||
labels | |||
valid_pct | float | 0.2 | |
seed | int | None | |
y_block | NoneType | None | |
item_tfms | NoneType | None | |
batch_tfms | NoneType | None | |
img_cls | BypassNewMeta | PILImage | |
bs | int | 64 | Size of batch |
val_bs | int | None | Size of batch for validation DataLoader |
shuffle | bool | True | Whether to shuffle data |
device | NoneType | None | Device to put DataLoaders |
验证集是一个随机子集,大小为valid_pct
,可以选择使用seed
进行可重复性创建。可以传入y_block
来指定目标的类型。
= untar_data(URLs.PETS)
path = get_image_files(path/"images")
fnames = ['_'.join(x.name.split('_')[:-1]) for x in fnames]
labels = ImageDataLoaders.from_lists(path, fnames, labels) dls
Downloading a new version of this dataset...
class SegmentationDataLoaders(DataLoaders):
"Basic wrapper around several `DataLoader`s with factory methods for segmentation problems"
@classmethod
@delegates(DataLoaders.from_dblock)
def from_label_func(cls, path, fnames, label_func, valid_pct=0.2, seed=None, codes=None, item_tfms=None, batch_tfms=None,
=PILImage, **kwargs):
img_cls"Create from list of `fnames` in `path`s with `label_func`."
= DataBlock(blocks=(ImageBlock(img_cls), MaskBlock(codes=codes)),
dblock =RandomSplitter(valid_pct, seed=seed),
splitter=label_func,
get_y=item_tfms,
item_tfms=batch_tfms)
batch_tfms= cls.from_dblock(dblock, fnames, path=path, **kwargs)
res return res
show_doc(SegmentationDataLoaders.from_label_func)
验证集是 valid_pct
的一个随机子集,可以选择使用 seed
来保证结果可复现。codes
包含标签的映射索引。
= untar_data(URLs.CAMVID_TINY)
path = get_image_files(path/'images')
fnames def label_func(x): return path/'labels'/f'{x.stem}_P{x.suffix}'
= np.loadtxt(path/'codes.txt', dtype=str)
codes
= SegmentationDataLoaders.from_label_func(path, fnames, label_func, codes=codes) dls
导出 -
from nbdev import nbdev_export
nbdev_export()