DataFrame.hvplot fails with MultiIndex columns
ALL software version info (hvPlot, holoviews, bokeh, python, notebook, OS, browser, any other relevant packages)
Selected packages from conda env export:
- bokeh=1.3.4=py37_0
- holoviews=1.12.6=py_0
- hvplot=0.5.2=py_0
- ipykernel=5.1.0=py37h39e3cac_0
- ipython=7.5.0=py37h39e3cac_0
- ipython_genutils=0.2.0=py37_0
- ipywidgets=7.4.2=py37_0
- jupyter_client=5.2.4=py37_0
- jupyter_core=4.4.0=py37_0
- jupyterlab=0.35.5=py37hf63ae98_0
- jupyterlab_server=0.2.0=py37_0
- matplotlib=3.0.3=py37h5429711_0
- numpy=1.16.3=py37h7e9f1db_0
- pandas=0.23.4=py37h04863e7_0
- python=3.7.3=h0371630_0
Description of expected behavior and the observed behavior
Calling DataFrame.hvplot with a DataFrame that has MultiIndex columns should work, similar to how DataFrame.plot does. Instead it raises a TypeError.
Complete, minimal, self-contained example code that reproduces the issue
import pandas as pd
import hvplot.pandas
df = pd.DataFrame([[1, 2], [3, 4]], columns=pd.MultiIndex.from_product([['a'], ['b', 'c']]))
df.plot() # works
df.hvplot() # raise TypeError
The traceback is:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-12-b455dcbf200c> in <module>
----> 1 df.hvplot()
/var/local/conda/envs/devtools/lib/python3.7/site-packages/hvplot/plotting/core.py in __call__(self, x, y, kind, **kwds)
70 return pn.panel(plot, **panel_dict)
71
---> 72 return self._get_converter(x, y, kind, **kwds)(kind, x, y)
73
74 def _get_converter(self, x=None, y=None, kind=None, **kwds):
/var/local/conda/envs/devtools/lib/python3.7/site-packages/hvplot/converter.py in __call__(self, kind, x, y)
942 obj = DynamicMap(cbcallable, streams=[self.stream])
943 else:
--> 944 obj = method(x, y)
945
946 if self.crs and self.project:
/var/local/conda/envs/devtools/lib/python3.7/site-packages/hvplot/converter.py in line(self, x, y, data)
1228
1229 def line(self, x=None, y=None, data=None):
-> 1230 return self.chart(Curve, x, y, data)
1231
1232 def step(self, x=None, y=None, data=None):
/var/local/conda/envs/devtools/lib/python3.7/site-packages/hvplot/converter.py in chart(self, element, x, y, data)
1223 for c in y:
1224 kdims, vdims = self._get_dimensions([x], [c])
-> 1225 chart = element(data, kdims, vdims).redim(**{c: self.value_label})
1226 charts.append((c, chart.relabel(**self._relabel)))
1227 return self._by_type(charts, self.group_label, sort=False).opts(opts)
TypeError: __call__() keywords must be strings
Same issue here.
Workaround for your minimal example:
df.stack().reset_index().hvplot(x='level_0', by='level_1')
But this might be to bodgy for a more complex real world application.
Yeah, we should patch this asap. Hopefully we can get a fix in a 0.5.3 release soon.
Similar issue with hvplot.xarray. Note there it fails eventually with a NotImplementedError in pandas. It's a bit unfortunate it also fails even when not using that index.
Related issue on pandas tracker https://github.com/pandas-dev/pandas/issues/34019.
/nix/store/1fkb99gaslnhjfwg2j8gv47v8q790bl4-python3-3.7.7-env/lib/python3.7/site-packages/hvplot/plotting/core.py in call(self, x, y, kind, **kwds) 70 return pn.panel(plot, **panel_dict) 71 ---> 72 return self._get_converter(x, y, kind, **kwds)(kind, x, y) 73 74 def _get_converter(self, x=None, y=None, kind=None, **kwds):
/nix/store/1fkb99gaslnhjfwg2j8gv47v8q790bl4-python3-3.7.7-env/lib/python3.7/site-packages/hvplot/plotting/core.py in _get_converter(self, x, y, kind, **kwds) 78 kind = kind or params.pop('kind', None) 79 return HoloViewsConverter( ---> 80 self._data, x, y, kind=kind, **params 81 ) 82
/nix/store/1fkb99gaslnhjfwg2j8gv47v8q790bl4-python3-3.7.7-env/lib/python3.7/site-packages/hvplot/converter.py in init(self, data, x, y, kind, by, use_index, group_label, value_label, backlog, persist, use_dask, crs, fields, groupby, dynamic, grid, legend, rot, title, xlim, ylim, clim, symmetric, logx, logy, loglog, hover, subplots, label, invert, stacked, colorbar, datashade, rasterize, row, col, figsize, debug, framewise, aggregator, projection, global_extent, geo, precompute, flip_xaxis, flip_yaxis, dynspread, hover_cols, x_sampling, y_sampling, project, tools, attr_labels, coastline, tiles, sort_date, check_symmetric_max, **kwds) 322 self._process_data(kind, data, x, y, by, groupby, row, col, 323 use_dask, persist, backlog, label, value_label, --> 324 hover_cols, attr_labels, kwds) 325 326 self.dynamic = dynamic
/nix/store/1fkb99gaslnhjfwg2j8gv47v8q790bl4-python3-3.7.7-env/lib/python3.7/site-packages/hvplot/converter.py in _process_data(self, kind, data, x, y, by, groupby, row, col, use_dask, persist, backlog, label, value_label, hover_cols, attr_labels, kwds) 647 data, x, y, by_new, groupby_new = process_xarray( 648 data, x, y, by, groupby, use_dask, persist, gridded, --> 649 label, value_label, other_dims, kind=kind) 650 651 if kind not in self._stats_types:
/nix/store/1fkb99gaslnhjfwg2j8gv47v8q790bl4-python3-3.7.7-env/lib/python3.7/site-packages/hvplot/util.py in process_xarray(data, x, y, by, groupby, use_dask, persist, gridded, label, value_label, other_dims, kind) 340 data = data.persist() if persist else data 341 else: --> 342 data = dataset.to_dataframe() 343 if len(data.index.names) > 1: 344 data = data.reset_index()
/nix/store/1fkb99gaslnhjfwg2j8gv47v8q790bl4-python3-3.7.7-env/lib/python3.7/site-packages/xarray/core/dataset.py in to_dataframe(self) 4503 this dataset's indices. 4504 """ -> 4505 return self._to_dataframe(self.dims) 4506 4507 def _set_sparse_data_from_dataframe(
/nix/store/1fkb99gaslnhjfwg2j8gv47v8q790bl4-python3-3.7.7-env/lib/python3.7/site-packages/xarray/core/dataset.py in _to_dataframe(self, ordered_dims) 4493 for k in columns 4494 ] -> 4495 index = self.coords.to_index(ordered_dims) 4496 return pd.DataFrame(dict(zip(columns, data)), index=index) 4497
/nix/store/1fkb99gaslnhjfwg2j8gv47v8q790bl4-python3-3.7.7-env/lib/python3.7/site-packages/xarray/core/coordinates.py in to_index(self, ordered_dims) 109 indexes = [self._data.get_index(k) for k in ordered_dims] # type: ignore 110 names = list(ordered_dims) --> 111 return pd.MultiIndex.from_product(indexes, names=names) 112 113 def update(self, other: Mapping[Hashable, Any]) -> None:
/nix/store/1fkb99gaslnhjfwg2j8gv47v8q790bl4-python3-3.7.7-env/lib/python3.7/site-packages/pandas/core/indexes/multi.py in from_product(cls, iterables, sortorder, names) 546 iterables = list(iterables) 547 --> 548 codes, levels = factorize_from_iterables(iterables) 549 if names is lib.no_default: 550 names = [getattr(it, "name", None) for it in iterables]
/nix/store/1fkb99gaslnhjfwg2j8gv47v8q790bl4-python3-3.7.7-env/lib/python3.7/site-packages/pandas/core/arrays/categorical.py in factorize_from_iterables(iterables) 2706 # For consistency, it should return a list of 2 lists. 2707 return [[], []] -> 2708 return map(list, zip(*(factorize_from_iterable(it) for it in iterables)))
/nix/store/1fkb99gaslnhjfwg2j8gv47v8q790bl4-python3-3.7.7-env/lib/python3.7/site-packages/pandas/core/arrays/categorical.py in
/nix/store/1fkb99gaslnhjfwg2j8gv47v8q790bl4-python3-3.7.7-env/lib/python3.7/site-packages/pandas/core/arrays/categorical.py in factorize_from_iterable(values) 2678 # but only the resulting categories, the order of which is independent 2679 # from ordered. Set ordered to False as default. See GH #15457 -> 2680 cat = Categorical(values, ordered=False) 2681 categories = cat.categories 2682 codes = cat.codes
/nix/store/1fkb99gaslnhjfwg2j8gv47v8q790bl4-python3-3.7.7-env/lib/python3.7/site-packages/pandas/core/arrays/categorical.py in init(self, values, categories, ordered, dtype, fastpath) 372 373 # we're inferring from values --> 374 dtype = CategoricalDtype(categories, dtype.ordered) 375 376 elif is_categorical_dtype(values):
/nix/store/1fkb99gaslnhjfwg2j8gv47v8q790bl4-python3-3.7.7-env/lib/python3.7/site-packages/pandas/core/dtypes/dtypes.py in init(self, categories, ordered) 220 221 def init(self, categories=None, ordered: Ordered = False): --> 222 self._finalize(categories, ordered, fastpath=False) 223 224 @classmethod
/nix/store/1fkb99gaslnhjfwg2j8gv47v8q790bl4-python3-3.7.7-env/lib/python3.7/site-packages/pandas/core/dtypes/dtypes.py in _finalize(self, categories, ordered, fastpath) 367 368 if categories is not None: --> 369 categories = self.validate_categories(categories, fastpath=fastpath) 370 371 self._categories = categories
/nix/store/1fkb99gaslnhjfwg2j8gv47v8q790bl4-python3-3.7.7-env/lib/python3.7/site-packages/pandas/core/dtypes/dtypes.py in validate_categories(categories, fastpath) 540 if not fastpath: 541 --> 542 if categories.hasnans: 543 raise ValueError("Categorial categories cannot be null") 544
pandas/_libs/properties.pyx in pandas._libs.properties.CachedProperty.get()
/nix/store/1fkb99gaslnhjfwg2j8gv47v8q790bl4-python3-3.7.7-env/lib/python3.7/site-packages/pandas/core/indexes/base.py in hasnans(self) 1779 """ 1780 if self._can_hold_na: -> 1781 return bool(self._isnan.any()) 1782 else: 1783 return False
pandas/_libs/properties.pyx in pandas._libs.properties.CachedProperty.get()
/nix/store/1fkb99gaslnhjfwg2j8gv47v8q790bl4-python3-3.7.7-env/lib/python3.7/site-packages/pandas/core/indexes/base.py in _isnan(self) 1759 """ 1760 if self._can_hold_na: -> 1761 return isna(self) 1762 else: 1763 # shouldn't reach to this condition by checking hasnans beforehand
/nix/store/1fkb99gaslnhjfwg2j8gv47v8q790bl4-python3-3.7.7-env/lib/python3.7/site-packages/pandas/core/dtypes/missing.py in isna(obj) 124 Name: 1, dtype: bool 125 """ --> 126 return _isna(obj) 127 128
/nix/store/1fkb99gaslnhjfwg2j8gv47v8q790bl4-python3-3.7.7-env/lib/python3.7/site-packages/pandas/core/dtypes/missing.py in _isna_new(obj) 136 # hack (for now) because MI registers as ndarray 137 elif isinstance(obj, ABCMultiIndex): --> 138 raise NotImplementedError("isna is not defined for MultiIndex") 139 elif isinstance(obj, type): 140 return False
NotImplementedError: isna is not defined for MultiIndex
</details>
It would be definitely nice to have this working
I can still reproduce.
Similar issue with
hvplot.xarray. Note there it fails eventually with aNotImplementedErrorinpandas. It's a bit unfortunate it also fails even when not using that index.Related issue on pandas tracker pandas-dev/pandas#34019.
I just ran into this same bug with hvplot.xarray, although it gave me a ValueError.