from nbdev.cli import *
数据核心
! [ -e /content ] && pip install -Uqq fastai # 在Colab上升级fastai
from __future__ import annotations
from fastai.torch_basics import *
from fastai.data.load import *
from nbdev.showdoc import *
收集数据的核心功能
这里的类提供了将一系列变换应用于一组项目(TfmdLists
,Datasets
)或一个DataLoader
(TfmdDl
)的功能,以及用于收集模型训练数据的基类:DataLoaders
。
TfmdDL -
@typedispatch
def show_batch(
# 批处理中的输入
x, # 批处理中的目标(s)
y, # 长度为 `max_n` 的 (`x`, `y`) 对列表
samples, =None, # 要显示数据的`ctx`对象列表。可以是matplotlib轴、DataFrame等。
ctxs=9, # 显示的最大`样本`数量
max_n**kwargs
):"Show `max_n` input(s) and target(s) from the batch."
if ctxs is None: ctxs = Inf.nones
if hasattr(samples[0], 'show'):
= [s.show(ctx=c, **kwargs) for s,c,_ in zip(samples,ctxs,range(max_n))]
ctxs else:
for i in range_of(samples[0]):
= [b.show(ctx=c, **kwargs) for b,c,_ in zip(samples.itemgot(i),ctxs,range(max_n))]
ctxs return ctxs
show_batch
是一个类型分派函数,负责显示解码后的 samples
。x
和 y
分别是要显示的批次中的输入和目标,并根据它们的类型进行分派。如果 x
是 TensorImage
或 TensorText
,则 show_batch
会有不同的实现(详情请参见 vision.core 或 text.data)。ctxs
可以被传递,但该函数负责在必要时创建它们。kwargs
取决于具体的实现。
@typedispatch
def show_results(
# 批处理中的输入
x, # 批处理中的目标(s)
y, # 长度为 `max_n` 的 (`x`, `y`) 对列表
samples, # 模型预测的输出列表
outs, =None, # 要显示数据的`ctx`对象列表。可以是matplotlib轴、DataFrame等。
ctxs=9, # 显示的最大`样本`数量
max_n**kwargs
):"Show `max_n` results with input(s), target(s) and prediction(s)."
if ctxs is None: ctxs = Inf.nones
for i in range(len(samples[0])):
= [b.show(ctx=c, **kwargs) for b,c,_ in zip(samples.itemgot(i),ctxs,range(max_n))]
ctxs for i in range(len(outs[0])):
= [b.show(ctx=c, **kwargs) for b,c,_ in zip(outs.itemgot(i),ctxs,range(max_n))]
ctxs return ctxs
show_results
是一个类型分发函数,负责显示解码后的 samples
及其对应的 outs
。与 show_batch
类似,x
和 y
是要显示的批次中的输入和目标,并根据它们的类型进行分发。可以传递 ctxs
,但如果需要,函数有责任自己创建它们。kwargs
取决于具体的实现。
= ["show_batch", "show_results"] _all_
= ('after_item','before_batch','after_batch') _batch_tfms
class TfmdDL(DataLoader):
"Transformed `DataLoader`"
@delegates(DataLoader.__init__)
def __init__(self,
# 从映射或可迭代样式的数据集中加载数据
dataset, int=64, # 批量大小
bs:bool=False, # 是否打乱数据
shuffle:int=None, # 并行使用的 CPU 核心数量(默认:所有可用核心,最多 16 个)
num_workers:bool=False, # 是否打印详细日志
verbose:bool=True, # 是否为批量转换运行 `setup()`
do_setup:**kwargs
):if num_workers is None: num_workers = min(16, defaults.cpus)
for nm in _batch_tfms: kwargs[nm] = Pipeline(kwargs.get(nm,None))
super().__init__(dataset, bs=bs, shuffle=shuffle, num_workers=num_workers, **kwargs)
if do_setup:
for nm in _batch_tfms:
f"Setting up {nm}: {kwargs[nm]}", verbose)
pv(self)
kwargs[nm].setup(
def _one_pass(self):
= self.do_batch([self.do_item(None)])
b if self.device is not None: b = to_device(b, self.device)
= self.after_batch(b)
its self._n_inp = 1 if not isinstance(its, (list,tuple)) or len(its)==1 else len(its)-1
self._types = explode_types(its)
def _retain_dl(self,b):
if not getattr(self, '_types', None): self._one_pass()
return retain_types(b, typs=self._types)
@delegates(DataLoader.new)
def new(self,
=None, # 从映射或可迭代样式的数据集中加载数据
dataset=None, # 新创建的 `DataLoader` 对象的类
cls**kwargs
):= super().new(dataset, cls, do_setup=False, **kwargs)
res if not hasattr(self, '_n_inp') or not hasattr(self, '_types'):
try:
self._one_pass()
= self._n_inp,self._types
res._n_inp,res._types except Exception as e:
print("Could not do one pass in your dataloader, there is something wrong in it. Please see the stack trace below:")
raise
else: res._n_inp,res._types = self._n_inp,self._types
return res
def before_iter(self):
super().before_iter()
= getattr(self.dataset, 'split_idx', None)
split_idx for nm in _batch_tfms:
= getattr(self,nm)
f if isinstance(f,Pipeline): f.split_idx=split_idx
def decode(self,
# 批量解码
b
):return to_cpu(self.after_batch.decode(self._retain_dl(b)))
def decode_batch(self,
# 批量解码
b, int=9, # 最大解码项目数
max_n:bool=True # 是否解码所有变换。如果为 `False`,则解码至项目知道如何显示自身为止。
full:
): return self._decode_batch(self.decode(b), max_n, full)
def _decode_batch(self, b, max_n=9, full=True):
= self.after_item.decode
f = self.before_batch.decode
f1 = compose(f1, f, partial(getcallable(self.dataset,'decode'), full = full))
f return L(batch_to_samples(b, max_n=max_n)).map(f)
def _pre_show_batch(self, b, max_n=9):
"Decode `b` to be ready for `show_batch`"
= self.decode(b)
b if hasattr(b, 'show'): return b,None,None
= self._decode_batch(b, max_n, full=False)
its if not is_listy(b): b,its = [b],L((o,) for o in its)
return detuplify(b[:self.n_inp]),detuplify(b[self.n_inp:]),its
def show_batch(self,
=None, # 批量展示
bint=9, # 显示的最大项目数
max_n:=None, # 要显示数据的`ctx`对象列表。可以是matplotlib轴、DataFrame等
ctxsbool=True, # 是否显示数据
show:bool=False, # 是否仅显示一个
unique:**kwargs
):"Show `max_n` input(s) and target(s) from the batch."
if unique:
= self.get_idxs
old_get_idxs self.get_idxs = lambda: Inf.zeros
if b is None: b = self.one_batch()
if not show: return self._pre_show_batch(b, max_n=max_n)
*self._pre_show_batch(b, max_n=max_n), ctxs=ctxs, max_n=max_n, **kwargs)
show_batch(if unique: self.get_idxs = old_get_idxs
def show_results(self,
# 批量显示结果
b, # 模型对该批次的预测输出
out, int=9, # 显示的最大项目数
max_n:=None, # 要显示数据的`ctx`对象列表。可以是matplotlib轴、DataFrame等
ctxsbool=True, # 是否显示数据
show:**kwargs
):"Show `max_n` results with input(s), target(s) and prediction(s)."
= self.show_batch(b, max_n=max_n, show=False)
x,y,its = type(b)(b[:self.n_inp] + (tuple(out) if is_listy(out) else (out,)))
b_out = self.show_batch(b_out, max_n=max_n, show=False)
x1,y1,outs = (x,x1,None,None) if its is None else (x, y, its, outs.itemgot(slice(self.n_inp,None)))
res if not show: return res
*res, ctxs=ctxs, max_n=max_n, **kwargs)
show_results(
@property
def n_inp(self) -> int:
"Number of elements in `Datasets` or `TfmdDL` tuple to be considered part of input."
if hasattr(self.dataset, 'n_inp'): return self.dataset.n_inp
if not hasattr(self, '_n_inp'): self._one_pass()
return self._n_inp
TfmdDL
是一个 DataLoader
,它从一系列 Transform
创建 Pipeline
,用于回调 after_item
、before_batch
和 after_batch
。因此,它可以解码或显示处理后的 batch
。
add_docs(TfmdDL,="Decode `b` using `tfms`",
decode="Decode `b` entirely",
decode_batch="Create a new version of self with a few changed attributes",
new="Show `b` (defaults to `one_batch`), a list of lists of pipeline outputs (i.e. output of a `DataLoader`)",
show_batch="Show each item of `b` and `out`",
show_results="override",
before_iter="Put self and its transforms state on `device`") to
class _Category(int, ShowTitle): pass
#测试保留类型
class NegTfm(Transform):
def encodes(self, x): return torch.neg(x)
def decodes(self, x): return torch.neg(x)
= TfmdDL([(TensorImage([1]),)] * 4, after_batch=NegTfm(), bs=4, num_workers=4)
tdl = tdl.one_batch()
b type(b[0]), TensorImage)
test_eq(= (tensor([1.,1.,1.,1.]),)
b type(tdl.decode_batch(b)[0][0]), TensorImage) test_eq(
class A(Transform):
def encodes(self, x): return x
def decodes(self, x): return TitledInt(x)
@Transform
def f(x)->None: return fastuple((x,x))
= torch.arange(50)
start 2), fastuple((2,2))) test_eq_type(f(
= A()
a = TfmdDL(start, after_item=lambda x: (a(x), f(x)), bs=4)
tdl = tdl.one_batch()
x,y type(y), fastuple)
test_eq(
= tdl.decode_batch((x,y))
s type(s[0][1]), fastuple) test_eq(
= TfmdDL(torch.arange(0,50), after_item=A(), after_batch=NegTfm(), bs=4)
tdl 0], start[0])
test_eq(tdl.dataset[len(tdl), (50-1)//4+1)
test_eq(4)
test_eq(tdl.bs, '0\n1\n2\n3')
test_stdout(tdl.show_batch, =True), '0\n0\n0\n0') test_stdout(partial(tdl.show_batch, unique
class B(Transform):
= 'a'
parameters def __init__(self): self.a = torch.tensor(0.)
def encodes(self, x): x
= TfmdDL([(TensorImage([1]),)] * 4, after_batch=B(), bs=4)
tdl 0].a.device, torch.device('cpu'))
test_eq(tdl.after_batch.fs[
tdl.to(default_device())0].a.device, default_device()) test_eq(tdl.after_batch.fs[
方法
show_doc(TfmdDL.one_batch)
= NegTfm()
tfm = TfmdDL(start, after_batch=tfm, bs=4) tdl
= tdl.one_batch()
b 0,-1,-2,-3]), b) test_eq(tensor([
show_doc(TfmdDL.decode)
0,1,2,3)) test_eq(tdl.decode(b), tensor(
show_doc(TfmdDL.decode_batch)
0,1,2,3]) test_eq(tdl.decode_batch(b), [
show_doc(TfmdDL.show_batch)
TfmdDL.show_batch
[source]
TfmdDL.show_batch
(b
=None
,max_n
=9
,ctxs
=None
,show
=True
,unique
=False
, **kwargs
)
Show b
(defaults to one_batch
), a list of lists of pipeline outputs (i.e. output of a DataLoader
)
show_doc(TfmdDL.to)
数据加载器 -
@docs
class DataLoaders(GetAttr):
"Basic wrapper around several `DataLoader`s."
='train'
_defaultdef __init__(self,
*loaders, # `DataLoader`对象用于包装
str|Path='.', # 存储导出对象的路径
path:=None # 用于放置 `DataLoaders` 的设备
device
):self.loaders,self.path = list(loaders),Path(path)
if device is not None and (loaders!=() and hasattr(loaders[0],'to')): self.device = device
def __getitem__(self, i): return self.loaders[i]
def __len__(self): return len(self.loaders)
def new_empty(self):
= [dl.new(dl.dataset.new_empty()) for dl in self.loaders]
loaders return type(self)(*loaders, path=self.path, device=self.device)
def _set(i, self, v): self.loaders[i] = v
= add_props(lambda i,x: x[i], _set)
train ,valid = add_props(lambda i,x: x[i].dataset)
train_ds,valid_ds
@property
def device(self): return self._device
@device.setter
def device(self,
# 用于放置 `DataLoaders` 的设备
d
):for dl in self.loaders: dl.to(d)
self._device = d
def to(self,
# 用于放置 `DataLoaders` 的设备
device
):self.device = device
return self
def _add_tfms(self, tfms, event, dl_idx):
"Adds `tfms` to `event` on `dl`"
if(isinstance(dl_idx,str)): dl_idx = 0 if(dl_idx=='train') else 1
= getattr(self[dl_idx], event)
dl_tfms apply(dl_tfms.add, tfms)
def add_tfms(self,
# 要应用的`Transform`列表或`Pipeline`
tfms, # 何时运行 `Transform`。`TfmdDL` 中提到的事件
event, =None # 要添加 `tfms` 的 `DataLoader` 对象列表
loaders
):"Adds `tfms` to `events` on `loaders`"
if(loaders is None): loaders=range(len(self.loaders))
if not is_listy(loaders): loaders = listify(loaders)
for loader in loaders:
self._add_tfms(tfms,event,loader)
def cuda(self): return self.to(device=default_device())
def cpu(self): return self.to(device=torch.device('cpu'))
@classmethod
def from_dsets(cls,
*ds, # `数据集`对象
str|Path='.', # 用于放置 `DataLoaders` 的路径
path:int=64, # 批量大小
bs:=None, # 用于放置 `DataLoaders` 的设备
device=TfmdDL, # `DataLoader` 的类型
dl_type**kwargs
):= (True,) + (False,) * (len(ds)-1)
default = {'shuffle': default, 'drop_last': default}
defaults = {k:tuple(Pipeline(kwargs[k]) for i in range_of(ds)) for k in _batch_tfms if k in kwargs}
tfms = merge(defaults, {k: tuplify(v, match=ds) for k,v in kwargs.items() if k not in _batch_tfms}, tfms)
kwargs = [{k: v[i] for k,v in kwargs.items()} for i in range_of(ds)]
kwargs return cls(*[dl_type(d, bs=bs, **k) for d,k in zip(ds, kwargs)], path=path, device=device)
@classmethod
def from_dblock(cls,
# `DataBlock` 对象
dblock, # 数据来源。可以是文件的 `路径`
source, str|Path='.', # 用于放置 `DataLoaders` 的路径
path:int=64, # 批量大小
bs:int=None, # 批量大小 for validation `DataLoader`
val_bs:bool=True, # 是否打乱数据
shuffle:=None, # 用于放置 `DataLoaders` 的设备
device**kwargs
):return dblock.dataloaders(source, path=path, bs=bs, val_bs=val_bs, shuffle=shuffle, device=device, **kwargs)
=dict(__getitem__="Retrieve `DataLoader` at `i` (`0` is training, `1` is validation)",
_docs="Training `DataLoader`",
train="Validation `DataLoader`",
valid="Training `Dataset`",
train_ds="Validation `Dataset`",
valid_ds="Use `device`",
to="Add `tfms` to `loaders` for `event",
add_tfms="Use accelerator if available",
cuda="Use the cpu",
cpu="Create a new empty version of `self` with the same transforms",
new_empty="Create a dataloaders from a given `dblock`") from_dblock
= DataLoaders(tdl,tdl)
dls = dls.train.one_batch()
x = first(tdl)
x2
test_eq(x,x2)= dls.one_batch()
x2 test_eq(x,x2)
#测试分配有效
= dls.train.new(bs=4) dls.train
可以通过Dataloaders.add_tfms
向多个数据加载器添加多个变换。您可以通过名称列表指定数据加载器,例如dls.add_tfms(...,'valid',...)
,或通过索引dls.add_tfms(...,1,....)
。默认情况下,变换会添加到所有数据加载器中。event
是一个必需的参数,用于确定变换何时运行,有关事件的更多信息,请参考TfmdDL
。tfms
是一个Transform
列表,是必需的参数。
class _TestTfm(Transform):
def encodes(self, o): return torch.ones_like(o)
def decodes(self, o): return o
= TfmdDL(start, bs=4),TfmdDL(start, bs=4)
tdl1,tdl2 = DataLoaders(tdl1,tdl2)
dls2 'after_batch',['valid'])
dls2.add_tfms([_TestTfm()],'after_batch',[1])
dls2.add_tfms([_TestTfm()], dls2.train.after_batch,dls2.valid.after_batch,
(Pipeline: , Pipeline: _TestTfm -> _TestTfm)
len(dls2.train.after_batch.fs),0)
test_eq(len(dls2.valid.after_batch.fs),2)
test_eq(next(iter(dls2.valid)),tensor([1,1,1,1])) test_eq(
class _T(Transform):
def encodes(self, o): return -o
class _T2(Transform):
def encodes(self, o): return o/2
#测试变换应用于训练和验证数据加载器
= DataLoaders.from_dsets([1,], [5,], bs=1, after_item=_T, after_batch=_T2)
dls_from_ds = first(dls_from_ds.train)
b -.5]))
test_eq(b, tensor([= first(dls_from_ds.valid)
b -2.5])) test_eq(b, tensor([
方法
__getitem__) show_doc(DataLoaders.
DataLoaders.__getitem__
[source]
DataLoaders.__getitem__
(i
)
Retrieve DataLoader
at i
(0
is training, 1
is validation)
x2
tensor([ 0, -1, -2, -3])
= dls[0].one_batch()
x2 test_eq(x,x2)
="DataLoaders.train") show_doc(DataLoaders.train, name
DataLoaders.train
[source]
Training DataLoader
="DataLoaders.valid") show_doc(DataLoaders.valid, name
DataLoaders.valid
[source]
Validation DataLoader
="DataLoaders.train_ds") show_doc(DataLoaders.train_ds, name
DataLoaders.train_ds
[source]
Training Dataset
="DataLoaders.valid_ds") show_doc(DataLoaders.valid_ds, name
DataLoaders.valid_ds
[source]
Validation Dataset
TfmdLists -
class FilteredBase:
"Base class for lists with subsets"
= TfmdDL,DataLoaders
_dl_type,_dbunch_type def __init__(self, *args, dl_type=None, **kwargs):
if dl_type is not None: self._dl_type = dl_type
self.dataloaders = delegates(self._dl_type.__init__)(self.dataloaders)
super().__init__(*args, **kwargs)
@property
def n_subsets(self): return len(self.splits)
def _new(self, items, **kwargs): return super()._new(items, splits=self.splits, **kwargs)
def subset(self): raise NotImplemented
def dataloaders(self,
int=64, # 批量大小
bs:bool=None, # (已弃用,请使用 `shuffle`)打乱训练 `DataLoader`
shuffle_train:bool=True, # 洗牌训练 `DataLoader`
shuffle:bool=False, # 洗牌验证 `DataLoader`
val_shuffle:int=None, # 用于创建 `DataLoader` 的 `Datasets` 的大小
n:str|Path='.', # 用于放置 `DataLoaders` 的路径
path:=None, # `DataLoader` 的类型
dl_type:TfmdDLlist=None, # 传递给各个 `DataLoader` 的 kwargs 列表
dl_kwargs:=None, # 用于放置 `DataLoaders` 的设备
device:torch.devicebool=None, # 丢弃最后一个不完整的批次,默认为 `shuffle`
drop_last:int=None, # 验证批次大小,默认为 `bs`
val_bs:**kwargs
-> DataLoaders:
) if shuffle_train is not None:
=shuffle_train
shuffle'`shuffle_train` is deprecated. Use `shuffle` instead.',DeprecationWarning)
warnings.warn(if device is None: device=default_device()
if dl_kwargs is None: dl_kwargs = [{}] * self.n_subsets
if dl_type is None: dl_type = self._dl_type
if drop_last is None: drop_last = shuffle
={k[4:]:v for k,v in kwargs.items() if k.startswith('val_')}
val_kwargs= {'bs':bs,'shuffle':shuffle,'drop_last':drop_last,'n':n,'device':device}
def_kwargs = dl_type(self.subset(0), **merge(kwargs,def_kwargs, dl_kwargs[0]))
dl = {'bs':bs if val_bs is None else val_bs,'shuffle':val_shuffle,'n':None,'drop_last':False}
def_kwargs = [dl] + [dl.new(self.subset(i), **merge(kwargs,def_kwargs,val_kwargs,dl_kwargs[i]))
dls for i in range(1, self.n_subsets)]
return self._dbunch_type(*dls, path=path, device=device)
= add_props(lambda i,x: x.subset(i)) FilteredBase.train,FilteredBase.valid
show_doc(FilteredBase().dataloaders)
FilteredBase.dataloaders
[source]
FilteredBase.dataloaders
(bs
=64
,shuffle_train
=None
,shuffle
=True
,val_shuffle
=False
,n
=None
,path
='.'
,dl_type
=None
,dl_kwargs
=None
,device
=None
,drop_last
=None
,val_bs
=None
,num_workers
=None
,verbose
=False
,do_setup
=True
,pin_memory
=False
,timeout
=0
,batch_size
=None
,indexed
=None
,persistent_workers
=False
,wif
=None
,before_iter
=None
,after_item
=None
,before_batch
=None
,after_batch
=None
,after_iter
=None
,create_batches
=None
,create_item
=None
,create_batch
=None
,retain
=None
,get_idxs
=None
,sample
=None
,shuffle_fn
=None
,do_batch
=None
)
class TfmdLists(FilteredBase, L, GetAttr):
"A `Pipeline` of `tfms` applied to a collection of `items`"
='tfms'
_defaultdef __init__(self,
list, # 需要应用 `Transform` 的项目
items:|Pipeline, # 应用`Transform`(s) 或 `Pipeline`
tfms:MutableSequencebool=None, # 在 `L` 中使用 `list`
use_list:bool=True, # 为 `Transform` 调用 `setup()`
do_setup:int=None, # 对训练集或验证集应用`Transform`。`0`表示训练集,`1`表示验证集。
split_idx:bool=True, # 仅在训练 `DataLoader` 上应用 `Transform`(s)
train_setup:list=None, # 训练集和验证集的指标
splits:=None, # `items`中的数据类型
typesbool=False, # 打印详细输出
verbose:=None # `DataLoader` 的类型
dl_type:TfmdDL
):super().__init__(items, use_list=use_list)
if dl_type is not None: self._dl_type = dl_type
self.splits = L([slice(None),[]] if splits is None else splits).map(mask2idxs)
if isinstance(tfms,TfmdLists): tfms = tfms.tfms
if isinstance(tfms,Pipeline): do_setup=False
self.tfms = Pipeline(tfms, split_idx=split_idx)
'types,split_idx')
store_attr(if do_setup:
f"Setting up {self.tfms}", verbose)
pv(self.setup(train_setup=train_setup)
def _new(self, items, split_idx=None, **kwargs):
= ifnone(split_idx,self.split_idx)
split_idx try: return super()._new(items, tfms=self.tfms, do_setup=False, types=self.types, split_idx=split_idx, **kwargs)
except IndexError as e:
= [f"Tried to grab subset {i} in the Dataset, but it contained no items.\n\t{e.args[0]}"]
e.args raise
def subset(self, i): return self._new(self._get(self.splits[i]), split_idx=i)
def _after_item(self, o): return self.tfms(o)
def __repr__(self): return f"{self.__class__.__name__}: {self.items}\ntfms - {self.tfms.fs}"
def __iter__(self): return (self[i] for i in range(len(self)))
def show(self, o, **kwargs): return self.tfms.show(o, **kwargs)
def decode(self, o, **kwargs): return self.tfms.decode(o, **kwargs)
def __call__(self, o, **kwargs): return self.tfms.__call__(o, **kwargs)
def overlapping_splits(self): return L(Counter(self.splits.concat()).values()).filter(gt(1))
def new_empty(self): return self._new([])
def setup(self,
bool=True # 仅在训练 `DataLoader` 上应用 `Transform`(s)
train_setup:
):self.tfms.setup(self, train_setup)
if len(self) != 0:
= super().__getitem__(0) if self.splits is None else super().__getitem__(self.splits[0])[0]
x self.types = []
for f in self.tfms.fs:
self.types.append(getattr(f, 'input_types', type(x)))
= f(x)
x self.types.append(type(x))
= L(t if is_listy(t) else [t] for t in self.types).concat().unique()
types self.pretty_types = '\n'.join([f' - {t}' for t in types])
def infer_idx(self, x):
# 待办事项:检查我们是否真的需要这个,或者是否可以简化。
= 0
idx for t in self.types:
if isinstance(x, t): break
+= 1
idx = L(t if is_listy(t) else [t] for t in self.types).concat().unique()
types = '\n'.join([f' - {t}' for t in types])
pretty_types assert idx < len(self.types), f"Expected an input of type in \n{pretty_types}\n but got {type(x)}"
return idx
def infer(self, x):
return compose_tfms(x, tfms=self.tfms.fs[self.infer_idx(x):], split_idx=self.split_idx)
def __getitem__(self, idx):
= super().__getitem__(idx)
res if self._after_item is None: return res
return self._after_item(res) if is_indexer(idx) else res.map(self._after_item)
add_docs(TfmdLists,="Transform setup with self",
setup="From `Pipeline`",
decode="From `Pipeline`",
show="All splits that are in more than one split",
overlapping_splits="New `TfmdLists` with same tfms that only includes items in `i`th split",
subset="Finds the index where `self.tfms` can be applied to `x`, depending on the type of `x`",
infer_idx="Apply `self.tfms` to `x` starting at the right tfm depending on the type of `x`",
infer="A new version of `self` but with no items") new_empty
::: {#cell-55 .cell 0=‘输’ 1=‘出’}
def decode_at(o, idx):
"Decoded item at `idx`"
return o.decode(o[idx])
:::
::: {#cell-56 .cell 0=‘输’ 1=‘出’}
def show_at(o, idx, **kwargs):
"Show item at `idx`",
return o.show(o[idx], **kwargs)
:::
TfmdLists
结合了一组对象与一个 Pipeline
。tfms
可以是一个 Pipeline
或一个转换列表,在这种情况下,它会将它们包装在一个 Pipeline
中。use_list
作为参数传递给 L
,而 items
和 split_idx
则传递给 Pipeline
的每个转换。do_setup
指示在初始化期间是否应该调用 Pipeline.setup
方法。
class _IntFloatTfm(Transform):
def encodes(self, o): return TitledInt(o)
def decodes(self, o): return TitledFloat(o)
=_IntFloatTfm()
int2f_tfm
def _neg(o): return -o
= Transform(_neg, _neg) neg_tfm
= L([1.,2.,3.]); tfms = [neg_tfm, int2f_tfm]
items = TfmdLists(items, tfms=tfms)
tl 0], TitledInt(-1))
test_eq_type(tl[1], TitledInt(-2))
test_eq_type(tl[2]), TitledFloat(3.))
test_eq_type(tl.decode(tl[lambda: show_at(tl, 2), '-3')
test_stdout(float, float, TitledInt])
test_eq(tl.types, [ tl
TfmdLists: [1.0, 2.0, 3.0]
tfms - [_neg:
encodes: (object,object) -> _negdecodes: (object,object) -> _neg, _IntFloatTfm:
encodes: (object,object) -> encodes
decodes: (object,object) -> decodes
]
# 向 TfmdLists 添加拆分
= [[0,2],[1]]
splits = TfmdLists(items, tfms=tfms, splits=splits)
tl 2)
test_eq(tl.n_subsets, 0))
test_eq(tl.train, tl.subset(1))
test_eq(tl.valid, tl.subset(0]])
test_eq(tl.train.items, items[splits[1]])
test_eq(tl.valid.items, items[splits[0)
test_eq(tl.train.tfms.split_idx, 1)
test_eq(tl.valid.tfms.split_idx, 0)
test_eq(tl.train.new_empty().split_idx, 1)
test_eq(tl.valid.new_empty().split_idx,
test_eq_type(tl.splits, L(splits))assert not tl.overlapping_splits()
= pd.DataFrame(dict(a=[1,2,3],b=[2,3,4]))
df = TfmdLists(df, lambda o: o.a+1, splits=[[0],[1,2]])
tl 1,2], [3,4])
test_eq(tl[= tl.subset(0)
tr 2])
test_eq(tr[:], [= tl.subset(1)
val 3,4]) test_eq(val[:], [
class _B(Transform):
def __init__(self): self.m = 0
def encodes(self, o): return o+self.m
def decodes(self, o): return o-self.m
def setups(self, items):
print(items)
self.m = tensor(items).float().mean().item()
# 测试设置,更新 `self.m`
= TfmdLists(items, _B())
tl 2) test_eq(tl.m,
TfmdLists: [1.0, 2.0, 3.0]
tfms - []
下面是我们如何使用 TfmdLists.setup
来实现一个简单的类别列表,从一个模拟文件列表中获取标签:
class _Cat(Transform):
= 1
order def encodes(self, o): return int(self.o2i[o])
def decodes(self, o): return TitledStr(self.vocab[o])
def setups(self, items): self.vocab,self.o2i = uniqueify(L(items), sort=True, bidir=True)
= _Cat()
tcat
def _lbl(o): return TitledStr(o.split('_')[0])
# 确保变换按 `order` 排序,并且首先调用 `_lbl`。
= ['dog_0.jpg','cat_0.jpg','cat_2.jpg','cat_1.jpg','dog_1.jpg']
fns = TfmdLists(fns, [tcat,_lbl])
tl = ['cat','dog']
exp_voc
test_eq(tcat.vocab, exp_voc)
test_eq(tl.tfms.vocab, exp_voc)
test_eq(tl.vocab, exp_voc)1,0,0,0,1))
test_eq(tl, (for o in tl], ('dog','cat','cat','cat','dog')) test_eq([tl.decode(o)
#仅考虑训练集进行设置
= TfmdLists(fns, [tcat,_lbl], splits=[[0,4], [1,2,3]])
tl 'dog']) test_eq(tcat.vocab, [
= NegTfm(split_idx=1)
tfm = TfmdLists(start, A())
tds = TfmdDL(tds, after_batch=tfm, bs=4)
tdl = tdl.one_batch()
x 4))
test_eq(x, torch.arange(= 1
tds.split_idx = tdl.one_batch()
x -torch.arange(4))
test_eq(x, = 0
tds.split_idx = tdl.one_batch()
x 4)) test_eq(x, torch.arange(
= TfmdLists(start, A())
tds = TfmdDL(tds, after_batch=NegTfm(), bs=4)
tdl 0], start[0])
test_eq(tdl.dataset[len(tdl), (len(tds)-1)//4+1)
test_eq(4)
test_eq(tdl.bs, '0\n1\n2\n3') test_stdout(tdl.show_batch,
show_doc(TfmdLists.subset)
show_doc(TfmdLists.infer_idx)
TfmdLists.infer_idx
[source]
TfmdLists.infer_idx
(x
)
Finds the index where self.tfms
can be applied to x
, depending on the type of x
show_doc(TfmdLists.infer)
TfmdLists.infer
[source]
TfmdLists.infer
(x
)
Apply self.tfms
to x
starting at the right tfm depending on the type of x
def mult(x): return x*2
= 2
mult.order
= ['dog_0.jpg','cat_0.jpg','cat_2.jpg','cat_1.jpg','dog_1.jpg']
fns = TfmdLists(fns, [_lbl,_Cat(),mult])
tl
'dog_45.jpg'), 0)
test_eq(tl.infer_idx('dog_45.jpg'), 2)
test_eq(tl.infer(
4), 2)
test_eq(tl.infer_idx(4), 8)
test_eq(tl.infer(
lambda: tl.infer_idx(2.0))
test_fail(lambda: tl.infer(2.0)) test_fail(
#测试Transform上的input_types功能
= _Cat()
cat = (str, float)
cat.input_types = TfmdLists(fns, [_lbl,cat,mult])
tl 2.0), 1)
test_eq(tl.infer_idx(
#测试函数上的类型注释是否有效
def mult(x:int|float): return x*2
= 2
mult.order = TfmdLists(fns, [_lbl,_Cat(),mult])
tl 2.0), 2) test_eq(tl.infer_idx(
数据集 -
@docs
@delegates(TfmdLists)
class Datasets(FilteredBase):
"A dataset that creates a tuple from each `tfms`"
def __init__(self,
list=None, # 创建`Datasets`的项目列表
items:|Pipeline=None, # 要应用的`Transform`列表或`Pipeline`
tfms:MutableSequence=None, # 如果为None,则从`items`和`tfms`生成`self.tls`。
tls:TfmdListsint=None, # `Datasets` 元组中应被视为输入部分的元素数量
n_inp:=None, # 当调用函数 `FilteredBase.dataloaders` 时使用的默认 `DataLoader` 类型
dl_type**kwargs
):super().__init__(dl_type=dl_type)
self.tls = L(tls if tls else [TfmdLists(items, t, **kwargs) for t in L(ifnone(tfms,[None]))])
self.n_inp = ifnone(n_inp, max(1, len(self.tls)-1))
def __getitem__(self, it):
= tuple([tl[it] for tl in self.tls])
res return res if is_indexer(it) else list(zip(*res))
def __getattr__(self,k): return gather_attrs(self, k, 'tls')
def __dir__(self): return super().__dir__() + gather_attr_names(self, 'tls')
def __len__(self): return len(self.tls[0])
def __iter__(self): return (self[i] for i in range(len(self)))
def __repr__(self): return coll_repr(self)
def decode(self, o, full=True): return tuple(tl.decode(o_, full=full) for o_,tl in zip(o,tuplify(self.tls, match=o)))
def subset(self, i): return type(self)(tls=L(tl.subset(i) for tl in self.tls), n_inp=self.n_inp)
def _new(self, items, *args, **kwargs): return super()._new(items, tfms=self.tfms, do_setup=False, **kwargs)
def overlapping_splits(self): return self.tls[0].overlapping_splits()
def new_empty(self): return type(self)(tls=[tl.new_empty() for tl in self.tls], n_inp=self.n_inp)
@property
def splits(self): return self.tls[0].splits
@property
def split_idx(self): return self.tls[0].tfms.split_idx
@property
def items(self): return self.tls[0].items
@items.setter
def items(self, v):
for tl in self.tls: tl.items = v
def show(self, o, ctx=None, **kwargs):
for o_,tl in zip(o,self.tls): ctx = tl.show(o_, ctx=ctx, **kwargs)
return ctx
@contextmanager
def set_split_idx(self, i):
= self.split_idx
old_split_idx for tl in self.tls: tl.tfms.split_idx = i
try: yield self
finally:
for tl in self.tls: tl.tfms.split_idx = old_split_idx
=dict(
_docs="Compose `decode` of all `tuple_tfms` then all `tfms` on `i`",
decode="Show item `o` in `ctx`",
show="Get a `DataLoaders`",
dataloaders="All splits that are in more than one split",
overlapping_splits="New `Datasets` that only includes subset `i`",
subset="Create a new empty version of the `self`, keeping only the transforms",
new_empty="Contextmanager to use the same `Datasets` with another `split_idx`"
set_split_idx )
一个Datasets
通过对items
(通常是输入和目标)应用tfms
中的每个Transform
(或Pipeline
)创建一个元组。请注意,如果tfms
仅包含一组tfms
,则Datasets
给出的项目将是一个元素的元组。
n_inp
是应视为输入部分的元组中的元素数量,如果tfms
由一组转换组成,则默认为1,否则默认为len(tfms)-1
。在大多数情况下,Datasets
输出的元组中的元素数量将为2(输入,目标),但有时也可能为3(例如在孪生网络或表格数据中),在这种情况下,我们需要能够确定输入何时结束以及目标何时开始。
= [1,2,3,4]
items = Datasets(items, [[neg_tfm,int2f_tfm], [add(1)]])
dsets = dsets[0]
t -1,2))
test_eq(t, (0,1,2], [(-1,2),(-2,3),(-3,4)])
test_eq(dsets[1)
test_eq(dsets.n_inp, dsets.decode(t)
(1.0, 2)
class Norm(Transform):
def encodes(self, o): return (o-self.m)/self.s
def decodes(self, o): return (o*self.s)+self.m
def setups(self, items):
= tensor(items).float()
its self.m,self.s = its.mean(),its.std()
= [1,2,3,4]
items = Norm()
nrm = Datasets(items, [[neg_tfm,int2f_tfm], [neg_tfm,nrm]])
dsets
= zip(*dsets)
x,y 0)
test_close(tensor(y).mean(), 1)
test_close(tensor(y).std(), -1,-2,-3,-4,))
test_eq(x, (-2.5)
test_eq(nrm.m, lambda:show_at(dsets, 1), '-2')
test_stdout(
test_eq(dsets.m, nrm.m)
test_eq(dsets.norm.m, nrm.m) test_eq(dsets.train.norm.m, nrm.m)
#检查过滤器是否正确应用
class B(Transform):
def encodes(self, x)->None: return int(x+1)
def decodes(self, x): return TitledInt(x-1)
= B(split_idx=1)
add1
= Datasets(items, [neg_tfm, [neg_tfm,int2f_tfm,add1]], splits=[[3],[0,1,2]])
dsets 1], [-2,-2])
test_eq(dsets[1], [-2,-1])
test_eq(dsets.valid[1,1]], [[-2,-1], [-2,-1]])
test_eq(dsets.valid[[0], [-4,-4]) test_eq(dsets.train[
= ['dog_0.jpg','cat_0.jpg','cat_2.jpg','cat_1.jpg','kid_1.jpg']
test_fns = _Cat()
tcat = Datasets(test_fns, [[tcat,_lbl]], splits=[[0,1,2], [3,4]])
dsets 'cat','dog'])
test_eq(tcat.vocab, [1,),(0,),(0,)])
test_eq(dsets.train, [(0], (0,))
test_eq(dsets.valid[lambda: show_at(dsets.train, 0), "dog") test_stdout(
= [0,1,2,3,4]
inp = Datasets(inp, tfms=[None])
dsets
*dsets[2], 2) # 检索一个项目(默认选择子集0)
test_eq(1,2], [(1,),(2,)]) # 按索引检索两个项目
test_eq(dsets[= [True,False,False,True,False]
mask 0,),(3,)]) # 通过掩码检索两个项目 test_eq(dsets[mask], [(
= pd.DataFrame(dict(a=[5,1,2,3,4]))
inp = Datasets(inp, tfms=attrgetter('a')).subset(0)
dsets *dsets[2], 2) # 检索一个项目(默认子集为0)
test_eq(1,2], [(1,),(2,)]) # 按索引检索两个项目
test_eq(dsets[= [True,False,False,True,False]
mask 5,),(3,)]) # 通过掩码检索两个项目 test_eq(dsets[mask], [(
#测试 n_inp
= [0,1,2,3,4]
inp = Datasets(inp, tfms=[None])
dsets 1)
test_eq(dsets.n_inp, = Datasets(inp, tfms=[[None],[None],[None]])
dsets 2)
test_eq(dsets.n_inp, = Datasets(inp, tfms=[[None],[None],[None]], n_inp=1)
dsets 1) test_eq(dsets.n_inp,
# 分割可以是索引
= Datasets(range(5), tfms=[None], splits=[tensor([0,2]), [1,3,4]])
dsets
0), [(0,),(2,)])
test_eq(dsets.subset(0,),(2,)]) # 子集0被别名为`train`
test_eq(dsets.train, [(1), [(1,),(3,),(4,)])
test_eq(dsets.subset(1,),(3,),(4,)]) # 子集1被别名为`valid`
test_eq(dsets.valid, [(*dsets.valid[2], 4)
test_eq(#assert '[(1,),(3,),(4,)]' in str(dsets) and '[(0,),(2,)]' in str(dsets)
dsets
(#5) [(0,),(1,),(2,),(3,),(4,)]
# 分割可以是布尔掩码(它们不必覆盖所有项目,但必须互不相交)
= [[False,True,True,False,True], [True,False,False,False,False]]
splits = Datasets(range(5), tfms=[None], splits=splits)
dsets
1,),(2,),(4,)])
test_eq(dsets.train, [(0,)]) test_eq(dsets.valid, [(
# 对所有项目应用变换
= [[lambda x: x*2,lambda x: x+1]]
tfm = [[1,2],[0,3,4]]
splits = Datasets(range(5), tfm, splits=splits)
dsets 3,),(5,)])
test_eq(dsets.train,[(1,),(7,),(9,)])
test_eq(dsets.valid,[(False,True], [(5,)]) test_eq(dsets.train[
# 仅转换子集1
class _Tfm(Transform):
=1
split_idxdef encodes(self, x): return x*2
def decodes(self, x): return TitledStr(x//2)
= Datasets(range(5), [_Tfm()], splits=[[1,2],[0,3,4]])
dsets 1,),(2,)])
test_eq(dsets.train,[(0,),(6,),(8,)])
test_eq(dsets.valid,[(False,True], [(2,)])
test_eq(dsets.train[ dsets
(#5) [(0,),(1,),(2,),(3,),(4,)]
#一个上下文管理器,用于更改split_idx并在训练集上应用验证转换
= dsets.train
ds with ds.set_split_idx(1):
2,),(4,)])
test_eq(ds,[(1,),(2,)]) test_eq(dsets.train,[(
#测试数据集的pickle文件
= pickle.loads(pickle.dumps(dsets))
dsrc1
test_eq(dsets.train, dsrc1.train) test_eq(dsets.valid, dsrc1.valid)
= Datasets(range(5), [_Tfm(),noop], splits=[[1,2],[0,3,4]])
dsets 1,1),(2,2)])
test_eq(dsets.train,[(0,0),(6,3),(8,4)]) test_eq(dsets.valid,[(
= torch.arange(0,50)
start = Datasets(start, [A()])
tds = TfmdDL(tds, after_item=NegTfm(), bs=4)
tdl = tdl.one_batch()
b 0,),(1,),(2,),(3,)))
test_eq(tdl.decode_batch(b), (("0\n1\n2\n3") test_stdout(tdl.show_batch,
# 仅转换子集1
class _Tfm(Transform):
=1
split_idxdef encodes(self, x): return x*2
= Datasets(range(8), [None], splits=[[1,2,5,7],[0,3,4,6]]) dsets
# 仅转换子集1
class _Tfm(Transform):
=1
split_idxdef encodes(self, x): return x*2
= Datasets(range(8), [None], splits=[[1,2,5,7],[0,3,4,6]])
dsets = dsets.dataloaders(bs=4, after_batch=_Tfm(), shuffle=False, device=torch.device('cpu'))
dls 1,2,5, 7]),)])
test_eq(dls.train, [(tensor([0,6,8,12]),)])
test_eq(dls.valid, [(tensor([1) test_eq(dls.n_inp,
方法
= [1,2,3,4]
items = Datasets(items, [[neg_tfm,int2f_tfm]]) dsets
= Datasets([1,2])
_dsrc ="Datasets.dataloaders") show_doc(_dsrc.dataloaders, name
Datasets.dataloaders
[source]
Datasets.dataloaders
(bs
=64
,shuffle_train
=None
,shuffle
=True
,val_shuffle
=False
,n
=None
,path
='.'
,dl_type
=None
,dl_kwargs
=None
,device
=None
,drop_last
=None
,val_bs
=None
,num_workers
=None
,verbose
=False
,do_setup
=True
,pin_memory
=False
,timeout
=0
,batch_size
=None
,indexed
=None
,persistent_workers
=False
,wif
=None
,before_iter
=None
,after_item
=None
,before_batch
=None
,after_batch
=None
,after_iter
=None
,create_batches
=None
,create_item
=None
,create_batch
=None
,retain
=None
,get_idxs
=None
,sample
=None
,shuffle_fn
=None
,do_batch
=None
)
Get a DataLoaders
用于创建数据加载器。您可以在val_shuffle
之前添加val_
以覆盖验证集的功能。如果您需要处理多个数据加载器,dl_kwargs
可以提供更细粒度的每个数据加载器控制。
show_doc(Datasets.decode)
Datasets.decode
[source]
Datasets.decode
(o
,full
=True
)
Compose decode
of all tuple_tfms
then all tfms
on i
*dsets[0], -1)
test_eq(*dsets.decode((-1,)), 1) test_eq(
show_doc(Datasets.show)
lambda:dsets.show(dsets[1]), '-2') test_stdout(
show_doc(Datasets.new_empty)
Datasets.new_empty
[source]
Datasets.new_empty
()
Create a new empty version of the self
, keeping only the transforms
= [1,2,3,4]
items = Norm()
nrm = Datasets(items, [[neg_tfm,int2f_tfm], [neg_tfm]])
dsets = dsets.new_empty()
empty test_eq(empty.items, [])
#测试它也适用于数据框
= pd.DataFrame({'a':[1,2,3,4,5], 'b':[6,7,8,9,10]})
df = Datasets(df, [[attrgetter('a')], [attrgetter('b')]])
dsets = dsets.new_empty() empty
添加用于推断的测试集
# 仅转换子集1
class _Tfm1(Transform):
=0
split_idxdef encodes(self, x): return x*3
= Datasets(range(8), [[_Tfm(),_Tfm1()]], splits=[[1,2,5,7],[0,3,4,6]])
dsets 3,),(6,),(15,),(21,)])
test_eq(dsets.train, [(0,),(6,),(8,),(12,)]) test_eq(dsets.valid, [(
def test_set(
|TfmdLists, # 从地图或可迭代样式数据集中加载数据
dsets:Datasets# 测试数据集中的项目
test_items, =None, # 从 `dsets` 验证集中应用的 `Transform` 的起始索引
rm_tfmsbool=False # 测试项目是否包含标签
with_labels:
):"Create a test set from `test_items` using validation transforms of `dsets`"
if isinstance(dsets, Datasets):
= dsets.tls if with_labels else dsets.tls[:dsets.n_inp]
tls = [tl._new(test_items, split_idx=1) for tl in tls]
test_tls if rm_tfms is None: rm_tfms = [tl.infer_idx(get_first(test_items)) for tl in test_tls]
else: rm_tfms = tuplify(rm_tfms, match=test_tls)
for i,j in enumerate(rm_tfms): test_tls[i].tfms.fs = test_tls[i].tfms.fs[j:]
return Datasets(tls=test_tls)
elif isinstance(dsets, TfmdLists):
= dsets._new(test_items, split_idx=1)
test_tl if rm_tfms is None: rm_tfms = dsets.infer_idx(get_first(test_items))
= test_tl.tfms.fs[rm_tfms:]
test_tl.tfms.fs return test_tl
else: raise Exception(f"This method requires using the fastai library to assemble your data. Expected a `Datasets` or a `TfmdLists` but got {dsets.__class__.__name__}")
class _Tfm1(Transform):
=0
split_idxdef encodes(self, x): return x*3
= Datasets(range(8), [[_Tfm(),_Tfm1()]], splits=[[1,2,5,7],[0,3,4,6]])
dsets 3,),(6,),(15,),(21,)])
test_eq(dsets.train, [(0,),(6,),(8,),(12,)])
test_eq(dsets.valid, [(
#验证集的转换已应用
= test_set(dsets, [1,2,3])
tst 2,),(4,),(6,)]) test_eq(tst, [(
#使用不同类型进行测试
= _Tfm1()
tfm = None,2
tfm.split_idx,tfm.order = Datasets(['dog', 'cat', 'cat', 'dog'], [[_Cat(),tfm]])
dsets
#带有字符串
'dog', 'cat', 'cat']), [(3,), (0,), (0,)])
test_eq(test_set(dsets, [#使用整数
1,2]), [(3,), (6,)]) test_eq(test_set(dsets, [
#测试不同输入长度的效果
= Datasets(range(8), [[_Tfm(),_Tfm1()],[_Tfm(),_Tfm1()],[_Tfm(),_Tfm1()]], splits=[[1,2,5,7],[0,3,4,6]])
dsets = test_set(dsets, [1,2,3])
tst 2,2),(4,4),(6,6)])
test_eq(tst, [(
= Datasets(range(8), [[_Tfm(),_Tfm1()],[_Tfm(),_Tfm1()],[_Tfm(),_Tfm1()]], splits=[[1,2,5,7],[0,3,4,6]], n_inp=1)
dsets = test_set(dsets, [1,2,3])
tst 2,),(4,),(6,)]) test_eq(tst, [(
#使用rm_tfms进行测试
= Datasets(range(8), [[_Tfm(),_Tfm()]], splits=[[1,2,5,7],[0,3,4,6]])
dsets = test_set(dsets, [1,2,3])
tst 4,),(8,),(12,)])
test_eq(tst, [(
= Datasets(range(8), [[_Tfm(),_Tfm()]], splits=[[1,2,5,7],[0,3,4,6]])
dsets = test_set(dsets, [1,2,3], rm_tfms=1)
tst 2,),(4,),(6,)])
test_eq(tst, [(
= Datasets(range(8), [[_Tfm(),_Tfm()], [_Tfm(),_Tfm()]], splits=[[1,2,5,7],[0,3,4,6]], n_inp=2)
dsets = test_set(dsets, [1,2,3], rm_tfms=(1,0))
tst 2,4),(4,8),(6,12)]) test_eq(tst, [(
@patch
@delegates(TfmdDL.__init__)
def test_dl(self:DataLoaders,
# 测试数据集中的项目
test_items, =None, # 从`dsets`中的验证集应用的`Transform`(s)的起始索引
rm_type_tfmsbool=False, # 测试项目是否包含标签
with_labels:**kwargs
):"Create a test dataloader from `test_items` using validation transforms of `dls`"
= test_set(self.valid_ds, test_items, rm_tfms=rm_type_tfms, with_labels=with_labels
test_ds if isinstance(self.valid_ds, (Datasets, TfmdLists)) else test_items
) return self.valid.new(test_ds, **kwargs)
= Datasets(range(8), [[_Tfm(),_Tfm1()]], splits=[[1,2,5,7],[0,3,4,6]])
dsets = dsets.dataloaders(bs=4, device=torch.device('cpu')) dls
= Datasets(range(8), [[_Tfm(),_Tfm1()]], splits=[[1,2,5,7],[0,3,4,6]])
dsets = dsets.dataloaders(bs=4, device=torch.device('cpu'))
dls = dls.test_dl([2,3,4,5])
tst_dl 1)
test_eq(tst_dl._n_inp, list(tst_dl), [(tensor([ 4, 6, 8, 10]),)])
test_eq(#测试你可以改变变换
= dls.test_dl([2,3,4,5], after_item=add1)
tst_dl list(tst_dl), [(tensor([ 5, 7, 9, 11]),)]) test_eq(
导出 -
from nbdev import nbdev_export
nbdev_export()
Converted 00_torch_core.ipynb.
Converted 01_layers.ipynb.
Converted 01a_losses.ipynb.
Converted 02_data.load.ipynb.
Converted 03_data.core.ipynb.
Converted 04_data.external.ipynb.
Converted 05_data.transforms.ipynb.
Converted 06_data.block.ipynb.
Converted 07_vision.core.ipynb.
Converted 08_vision.data.ipynb.
Converted 09_vision.augment.ipynb.
Converted 09b_vision.utils.ipynb.
Converted 09c_vision.widgets.ipynb.
Converted 10_tutorial.pets.ipynb.
Converted 10b_tutorial.albumentations.ipynb.
Converted 11_vision.models.xresnet.ipynb.
Converted 12_optimizer.ipynb.
Converted 13_callback.core.ipynb.
Converted 13a_learner.ipynb.
Converted 13b_metrics.ipynb.
Converted 14_callback.schedule.ipynb.
Converted 14a_callback.data.ipynb.
Converted 15_callback.hook.ipynb.
Converted 15a_vision.models.unet.ipynb.
Converted 16_callback.progress.ipynb.
Converted 17_callback.tracker.ipynb.
Converted 18_callback.fp16.ipynb.
Converted 18a_callback.training.ipynb.
Converted 18b_callback.preds.ipynb.
Converted 19_callback.mixup.ipynb.
Converted 20_interpret.ipynb.
Converted 20a_distributed.ipynb.
Converted 21_vision.learner.ipynb.
Converted 22_tutorial.imagenette.ipynb.
Converted 23_tutorial.vision.ipynb.
Converted 24_tutorial.image_sequence.ipynb.
Converted 24_tutorial.siamese.ipynb.
Converted 24_vision.gan.ipynb.
Converted 30_text.core.ipynb.
Converted 31_text.data.ipynb.
Converted 32_text.models.awdlstm.ipynb.
Converted 33_text.models.core.ipynb.
Converted 34_callback.rnn.ipynb.
Converted 35_tutorial.wikitext.ipynb.
Converted 37_text.learner.ipynb.
Converted 38_tutorial.text.ipynb.
Converted 39_tutorial.transformers.ipynb.
Converted 40_tabular.core.ipynb.
Converted 41_tabular.data.ipynb.
Converted 42_tabular.model.ipynb.
Converted 43_tabular.learner.ipynb.
Converted 44_tutorial.tabular.ipynb.
Converted 45_collab.ipynb.
Converted 46_tutorial.collab.ipynb.
Converted 50_tutorial.datablock.ipynb.
Converted 60_medical.imaging.ipynb.
Converted 61_tutorial.medical_imaging.ipynb.
Converted 65_medical.text.ipynb.
Converted 70_callback.wandb.ipynb.
Converted 71_callback.tensorboard.ipynb.
Converted 72_callback.neptune.ipynb.
Converted 73_callback.captum.ipynb.
Converted 74_callback.azureml.ipynb.
Converted 97_test_utils.ipynb.
Converted 99_pytorch_doc.ipynb.
Converted dev-setup.ipynb.
Converted app_examples.ipynb.
Converted camvid.ipynb.
Converted migrating_catalyst.ipynb.
Converted migrating_ignite.ipynb.
Converted migrating_lightning.ipynb.
Converted migrating_pytorch.ipynb.
Converted ulmfit.ipynb.
Converted index.ipynb.
Converted quick_start.ipynb.
Converted tutorial.ipynb.