fastai
fastai copied to clipboard
DataBlock gives cryptic error when used with dataframe and no training set
Please confirm you have the latest versions of fastai, fastcore, and nbdev prior to reporting a bug (delete one): NO
fastai v2.4, but I believe the bug is still there. See below
Describe the bug
Receive IndexError
when using a datablock with a dataframe that has all items in the validation set
To Reproduce
data = pd.DataFrame({
'fname': [f'{x}.png' for x in range(10)],
'label': np.arange(10)%2,
'is_valid': True
})
blk = DataBlock((ImageBlock, CategoryBlock),
splitter=ColSplitter(),
get_x=ColReader('fname'),
get_y=ColReader('label'),
item_tfms=Resize(224, method=ResizeMethod.Squish),
)
blk.summary(data)
Expected behavior Have the examples constructed, or at least raise a sensible error.
Error with full stack trace
CLICK ME
IndexError Traceback (most recent call last)
<ipython-input-44-f6eb35c312e1> in <module>
----> 1 blk.summary(data)
~/miniconda3/envs/wind-damage2/lib/python3.9/site-packages/fastai/data/block.py in summary(self, source, bs, show_batch, **kwargs)
158 "Steps through the transform pipeline for one batch, and optionally calls `show_batch(**kwargs)` on the transient `Dataloaders`."
159 print(f"Setting-up type transforms pipelines")
--> 160 dsets = self.datasets(source, verbose=True)
161 print("\nBuilding one sample")
162 for tl in dsets.train.tls:
~/miniconda3/envs/wind-damage2/lib/python3.9/site-packages/fastai/data/block.py in datasets(self, source, verbose)
108 splits = (self.splitter or RandomSplitter())(items)
109 pv(f"{len(splits)} datasets of sizes {','.join([str(len(s)) for s in splits])}", verbose)
--> 110 return Datasets(items, tfms=self._combine_type_tfms(), splits=splits, dl_type=self.dl_type, n_inp=self.n_inp, verbose=verbose)
111
112 def dataloaders(self, source, path='.', verbose=False, **kwargs):
~/miniconda3/envs/wind-damage2/lib/python3.9/site-packages/fastai/data/core.py in __init__(self, items, tfms, tls, n_inp, dl_type, **kwargs)
327 def __init__(self, items=None, tfms=None, tls=None, n_inp=None, dl_type=None, **kwargs):
328 super().__init__(dl_type=dl_type)
--> 329 self.tls = L(tls if tls else [TfmdLists(items, t, **kwargs) for t in L(ifnone(tfms,[None]))])
330 self.n_inp = ifnone(n_inp, max(1, len(self.tls)-1))
331
~/miniconda3/envs/wind-damage2/lib/python3.9/site-packages/fastai/data/core.py in <listcomp>(.0)
327 def __init__(self, items=None, tfms=None, tls=None, n_inp=None, dl_type=None, **kwargs):
328 super().__init__(dl_type=dl_type)
--> 329 self.tls = L(tls if tls else [TfmdLists(items, t, **kwargs) for t in L(ifnone(tfms,[None]))])
330 self.n_inp = ifnone(n_inp, max(1, len(self.tls)-1))
331
~/miniconda3/envs/wind-damage2/lib/python3.9/site-packages/fastcore/foundation.py in __call__(cls, x, *args, **kwargs)
95 def __call__(cls, x=None, *args, **kwargs):
96 if not args and not kwargs and x is not None and isinstance(x,cls): return x
---> 97 return super().__call__(x, *args, **kwargs)
98
99 # Cell
~/miniconda3/envs/wind-damage2/lib/python3.9/site-packages/fastai/data/core.py in __init__(self, items, tfms, use_list, do_setup, split_idx, train_setup, splits, types, verbose, dl_type)
253 if do_setup:
254 pv(f"Setting up {self.tfms}", verbose)
--> 255 self.setup(train_setup=train_setup)
256
257 def _new(self, items, split_idx=None, **kwargs):
~/miniconda3/envs/wind-damage2/lib/python3.9/site-packages/fastai/data/core.py in setup(self, train_setup)
271 self.tfms.setup(self, train_setup)
272 if len(self) != 0:
--> 273 x = super().__getitem__(0) if self.splits is None else super().__getitem__(self.splits[0])[0]
274 self.types = []
275 for f in self.tfms.fs:
~/miniconda3/envs/wind-damage2/lib/python3.9/site-packages/fastcore/foundation.py in __getitem__(self, idx)
109 def _xtra(self): return None
110 def _new(self, items, *args, **kwargs): return type(self)(items, *args, use_list=None, **kwargs)
--> 111 def __getitem__(self, idx): return self._get(idx) if is_indexer(idx) else L(self._get(idx), use_list=None)
112 def copy(self): return self._new(self.items.copy())
113
~/miniconda3/envs/wind-damage2/lib/python3.9/site-packages/fastcore/foundation.py in _get(self, i)
113
114 def _get(self, i):
--> 115 if is_indexer(i) or isinstance(i,slice): return getattr(self.items,'iloc',self.items)[i]
116 i = mask2idxs(i)
117 return (self.items.iloc[list(i)] if hasattr(self.items,'iloc')
~/miniconda3/envs/wind-damage2/lib/python3.9/site-packages/pandas/core/indexing.py in __getitem__(self, key)
893
894 maybe_callable = com.apply_if_callable(key, self.obj)
--> 895 return self._getitem_axis(maybe_callable, axis=axis)
896
897 def _is_scalar_access(self, key: Tuple):
~/miniconda3/envs/wind-damage2/lib/python3.9/site-packages/pandas/core/indexing.py in _getitem_axis(self, key, axis)
1499
1500 # validate the location
-> 1501 self._validate_integer(key, axis)
1502
1503 return self.obj._ixs(key, axis=axis)
~/miniconda3/envs/wind-damage2/lib/python3.9/site-packages/pandas/core/indexing.py in _validate_integer(self, key, axis)
1442 len_axis = len(self.obj._get_axis(axis))
1443 if key >= len_axis or key < -len_axis:
-> 1444 raise IndexError("single positional indexer is out-of-bounds")
1445
1446 # -------------------------------------------------------------------
IndexError: single positional indexer is out-of-bounds
Additional context
It comes from this line: https://github.com/fastai/fastai/blob/351f4b9314e2ea23684fb2e19235ee5c5ef8cbfd/fastai/data/core.py#L272
(sorry, cannot link to link in notebook)
where self.splits[0]
is empty, and it calls its __getitem__[0]
, which does not exist.