hvplot icon indicating copy to clipboard operation
hvplot copied to clipboard

Support for Pandas DataFrames with hierarchical columns broken?

Open maximlt opened this issue 1 year ago • 0 comments

There's code in the converter to support hierarchical columns in a Pandas DataFrame, however it's untested and I think support for this kind of structure broke at some point.

https://github.com/holoviz/hvplot/blob/6c96c7e9abcd44380d2122e3d86827dedab32dea/hvplot/converter.py#L1221-L1229

Trying to plot a DataFrame with this structure

import pandas as pd, numpy as np
import hvplot.pandas

arrays = [
    ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
    ["one", "two", "one", "two", "one", "two", "one", "two"],
]
tuples = list(zip(*arrays))
index = pd.MultiIndex.from_tuples(tuples)
df = pd.DataFrame(np.random.randn(3, 8), index=["A", "B", "C"], columns=index)

df.hvplot.scatter()
image

Traceback:

[/Users/mliquet/dev/hvplot/hvplot/converter.py:1225](http://localhost:8888/Users/mliquet/dev/hvplot/hvplot/converter.py#line=1224): FutureWarning: The previous implementation of stack is deprecated and will be removed in a future version of pandas. See the What's New notes for pandas 2.1.0 for details. Specify future_stack=True to adopt the new implementation and silence this warning.
  self.data = data.stack().reset_index(1).rename(columns={'level_1': group_label})

---------------------------------------------------------------------------
DataError                                 Traceback (most recent call last)
File [~/dev/hvplot/hvplot/converter.py:1666](http://localhost:8888/lab/tree/~/dev/hvplot/hvplot/converter.py#line=1665), in HoloViewsConverter.__call__(self, kind, x, y)
   1665 try:
-> 1666     dataset = Dataset(data, self.indexes)
   1667 except Exception:

File [~/dev/holoviews/holoviews/core/data/__init__.py:329](http://localhost:8888/lab/tree/~/dev/holoviews/holoviews/core/data/__init__.py#line=328), in Dataset.__init__(self, data, kdims, vdims, **kwargs)
    328 validate_vdims = kwargs.pop('_validate_vdims', True)
--> 329 initialized = Interface.initialize(type(self), data, kdims, vdims,
    330                                    datatype=kwargs.get('datatype'))
    331 (data, self.interface, dims, extra_kws) = initialized

File [~/dev/holoviews/holoviews/core/data/interface.py:253](http://localhost:8888/lab/tree/~/dev/holoviews/holoviews/core/data/interface.py#line=252), in Interface.initialize(cls, eltype, data, kdims, vdims, datatype)
    252 try:
--> 253     (data, dims, extra_kws) = interface.init(eltype, data, kdims, vdims)
    254     break

File [~/dev/holoviews/holoviews/core/data/pandas.py:69](http://localhost:8888/lab/tree/~/dev/holoviews/holoviews/core/data/pandas.py#line=68), in PandasInterface.init(cls, eltype, data, kdims, vdims)
     68 if any(not isinstance(d, (str, Dimension)) for d in kdims+vdims):
---> 69     raise DataError(
     70         "Having a non-string as a column name in a DataFrame is not supported."
     71     )
     73 if kdims and not (len(kdims) == len(index_names) and {dimension_name(kd) for kd in kdims} == set(index_names)):

DataError: Having a non-string as a column name in a DataFrame is not supported.

During handling of the above exception, another exception occurred:

DataError                                 Traceback (most recent call last)
Cell In[3], line 1
----> 1 df.hvplot.scatter()

File [~/dev/hvplot/hvplot/plotting/core.py:576](http://localhost:8888/lab/tree/~/dev/hvplot/hvplot/plotting/core.py#line=575), in hvPlotTabular.scatter(self, x, y, **kwds)
    471 def scatter(self, x=None, y=None, **kwds):
    472     """
    473     The `scatter` plot visualizes your points as markers in 2D space. You can visualize
    474     one more dimension by using colors.
   (...)
    574     - Wiki: https://en.wikipedia.org/wiki/Scatter_plot
    575     """
--> 576     return self(x, y, kind='scatter', **kwds)

File ~/dev/hvplot/hvplot/plotting/core.py:95, in hvPlotBase.__call__(self, x, y, kind, **kwds)
     92         plot = self._get_converter(x, y, kind, **kwds)(kind, x, y)
     93         return pn.panel(plot, **panel_dict)
---> 95 return self._get_converter(x, y, kind, **kwds)(kind, x, y)

File [~/dev/hvplot/hvplot/converter.py:1668](http://localhost:8888/lab/tree/~/dev/hvplot/hvplot/converter.py#line=1667), in HoloViewsConverter.__call__(self, kind, x, y)
   1666         dataset = Dataset(data, self.indexes)
   1667     except Exception:
-> 1668         dataset = Dataset(data)
   1669     dataset = dataset.redim(**self._redim)
   1671 obj = method(x, y)

File [~/dev/holoviews/holoviews/core/data/__init__.py:329](http://localhost:8888/lab/tree/~/dev/holoviews/holoviews/core/data/__init__.py#line=328), in Dataset.__init__(self, data, kdims, vdims, **kwargs)
    326 kdims, vdims = kwargs.get('kdims'), kwargs.get('vdims')
    328 validate_vdims = kwargs.pop('_validate_vdims', True)
--> 329 initialized = Interface.initialize(type(self), data, kdims, vdims,
    330                                    datatype=kwargs.get('datatype'))
    331 (data, self.interface, dims, extra_kws) = initialized
    332 super().__init__(data, **dict(kwargs, **dict(dims, **extra_kws)))

File [~/dev/holoviews/holoviews/core/data/interface.py:253](http://localhost:8888/lab/tree/~/dev/holoviews/holoviews/core/data/interface.py#line=252), in Interface.initialize(cls, eltype, data, kdims, vdims, datatype)
    251     continue
    252 try:
--> 253     (data, dims, extra_kws) = interface.init(eltype, data, kdims, vdims)
    254     break
    255 except DataError:

File [~/dev/holoviews/holoviews/core/data/pandas.py:69](http://localhost:8888/lab/tree/~/dev/holoviews/holoviews/core/data/pandas.py#line=68), in PandasInterface.init(cls, eltype, data, kdims, vdims)
     66     vdims = list(data.columns[:nvdim if nvdim else None])
     68 if any(not isinstance(d, (str, Dimension)) for d in kdims+vdims):
---> 69     raise DataError(
     70         "Having a non-string as a column name in a DataFrame is not supported."
     71     )
     73 if kdims and not (len(kdims) == len(index_names) and {dimension_name(kd) for kd in kdims} == set(index_names)):
     74     kdim = dimension_name(kdims[0])

DataError: Having a non-string as a column name in a DataFrame is not supported.

maximlt avatar Jul 02 '24 13:07 maximlt