fastai icon indicating copy to clipboard operation
fastai copied to clipboard

DataBlock gives cryptic error when used with dataframe and no training set

Open cipri-tom opened this issue 3 years ago • 0 comments

Please confirm you have the latest versions of fastai, fastcore, and nbdev prior to reporting a bug (delete one): NO

fastai v2.4, but I believe the bug is still there. See below

Describe the bug Receive IndexError when using a datablock with a dataframe that has all items in the validation set

To Reproduce

data = pd.DataFrame({
    'fname': [f'{x}.png' for x in range(10)], 
    'label': np.arange(10)%2, 
    'is_valid': True
})
blk = DataBlock((ImageBlock, CategoryBlock),
                splitter=ColSplitter(),
                get_x=ColReader('fname'),
                get_y=ColReader('label'),
                item_tfms=Resize(224, method=ResizeMethod.Squish),
)

blk.summary(data)

Expected behavior Have the examples constructed, or at least raise a sensible error.

Error with full stack trace

CLICK ME
IndexError                                Traceback (most recent call last)
<ipython-input-44-f6eb35c312e1> in <module>
----> 1 blk.summary(data)

~/miniconda3/envs/wind-damage2/lib/python3.9/site-packages/fastai/data/block.py in summary(self, source, bs, show_batch, **kwargs)
    158     "Steps through the transform pipeline for one batch, and optionally calls `show_batch(**kwargs)` on the transient `Dataloaders`."
    159     print(f"Setting-up type transforms pipelines")
--> 160     dsets = self.datasets(source, verbose=True)
    161     print("\nBuilding one sample")
    162     for tl in dsets.train.tls:

~/miniconda3/envs/wind-damage2/lib/python3.9/site-packages/fastai/data/block.py in datasets(self, source, verbose)
    108         splits = (self.splitter or RandomSplitter())(items)
    109         pv(f"{len(splits)} datasets of sizes {','.join([str(len(s)) for s in splits])}", verbose)
--> 110         return Datasets(items, tfms=self._combine_type_tfms(), splits=splits, dl_type=self.dl_type, n_inp=self.n_inp, verbose=verbose)
    111 
    112     def dataloaders(self, source, path='.', verbose=False, **kwargs):

~/miniconda3/envs/wind-damage2/lib/python3.9/site-packages/fastai/data/core.py in __init__(self, items, tfms, tls, n_inp, dl_type, **kwargs)
    327     def __init__(self, items=None, tfms=None, tls=None, n_inp=None, dl_type=None, **kwargs):
    328         super().__init__(dl_type=dl_type)
--> 329         self.tls = L(tls if tls else [TfmdLists(items, t, **kwargs) for t in L(ifnone(tfms,[None]))])
    330         self.n_inp = ifnone(n_inp, max(1, len(self.tls)-1))
    331 

~/miniconda3/envs/wind-damage2/lib/python3.9/site-packages/fastai/data/core.py in <listcomp>(.0)
    327     def __init__(self, items=None, tfms=None, tls=None, n_inp=None, dl_type=None, **kwargs):
    328         super().__init__(dl_type=dl_type)
--> 329         self.tls = L(tls if tls else [TfmdLists(items, t, **kwargs) for t in L(ifnone(tfms,[None]))])
    330         self.n_inp = ifnone(n_inp, max(1, len(self.tls)-1))
    331 

~/miniconda3/envs/wind-damage2/lib/python3.9/site-packages/fastcore/foundation.py in __call__(cls, x, *args, **kwargs)
     95     def __call__(cls, x=None, *args, **kwargs):
     96         if not args and not kwargs and x is not None and isinstance(x,cls): return x
---> 97         return super().__call__(x, *args, **kwargs)
     98 
     99 # Cell

~/miniconda3/envs/wind-damage2/lib/python3.9/site-packages/fastai/data/core.py in __init__(self, items, tfms, use_list, do_setup, split_idx, train_setup, splits, types, verbose, dl_type)
    253         if do_setup:
    254             pv(f"Setting up {self.tfms}", verbose)
--> 255             self.setup(train_setup=train_setup)
    256 
    257     def _new(self, items, split_idx=None, **kwargs):

~/miniconda3/envs/wind-damage2/lib/python3.9/site-packages/fastai/data/core.py in setup(self, train_setup)
    271         self.tfms.setup(self, train_setup)
    272         if len(self) != 0:
--> 273             x = super().__getitem__(0) if self.splits is None else super().__getitem__(self.splits[0])[0]
    274             self.types = []
    275             for f in self.tfms.fs:

~/miniconda3/envs/wind-damage2/lib/python3.9/site-packages/fastcore/foundation.py in __getitem__(self, idx)
    109     def _xtra(self): return None
    110     def _new(self, items, *args, **kwargs): return type(self)(items, *args, use_list=None, **kwargs)
--> 111     def __getitem__(self, idx): return self._get(idx) if is_indexer(idx) else L(self._get(idx), use_list=None)
    112     def copy(self): return self._new(self.items.copy())
    113 

~/miniconda3/envs/wind-damage2/lib/python3.9/site-packages/fastcore/foundation.py in _get(self, i)
    113 
    114     def _get(self, i):
--> 115         if is_indexer(i) or isinstance(i,slice): return getattr(self.items,'iloc',self.items)[i]
    116         i = mask2idxs(i)
    117         return (self.items.iloc[list(i)] if hasattr(self.items,'iloc')

~/miniconda3/envs/wind-damage2/lib/python3.9/site-packages/pandas/core/indexing.py in __getitem__(self, key)
    893 
    894             maybe_callable = com.apply_if_callable(key, self.obj)
--> 895             return self._getitem_axis(maybe_callable, axis=axis)
    896 
    897     def _is_scalar_access(self, key: Tuple):

~/miniconda3/envs/wind-damage2/lib/python3.9/site-packages/pandas/core/indexing.py in _getitem_axis(self, key, axis)
   1499 
   1500             # validate the location
-> 1501             self._validate_integer(key, axis)
   1502 
   1503             return self.obj._ixs(key, axis=axis)

~/miniconda3/envs/wind-damage2/lib/python3.9/site-packages/pandas/core/indexing.py in _validate_integer(self, key, axis)
   1442         len_axis = len(self.obj._get_axis(axis))
   1443         if key >= len_axis or key < -len_axis:
-> 1444             raise IndexError("single positional indexer is out-of-bounds")
   1445 
   1446     # -------------------------------------------------------------------

IndexError: single positional indexer is out-of-bounds

Additional context

It comes from this line: https://github.com/fastai/fastai/blob/351f4b9314e2ea23684fb2e19235ee5c5ef8cbfd/fastai/data/core.py#L272

(sorry, cannot link to link in notebook)

where self.splits[0] is empty, and it calls its __getitem__[0] , which does not exist.

cipri-tom avatar Dec 10 '21 15:12 cipri-tom