ydata-profiling icon indicating copy to clipboard operation
ydata-profiling copied to clipboard

Fails on simple Iris dataset

Open rasbt opened this issue 3 years ago • 0 comments

Current Behaviour

Using the simple Iris dataset as an example, the report generation fails with an error.

Expected Behaviour

It should generate the report because the dataset format is similar to the example one in the README

Data Description

DataFrame with 4 numeric columns.

Code that reproduces the bug

import pandas as pd
from pandas_profiling import ProfileReport
from sklearn.datasets import load_iris

iris = load_iris()
X_iris = iris.data


X_df = pd.DataFrame(
    X_iris, columns=["sepal length", "sepal width", "petal width", "petal width"]
)

profile = ProfileReport(X_df, title="Report")
profile.to_file("your_report.html")

where X_df is

Screen Shot 2022-09-13 at 1 11 56 PM
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Input In [5], in <module>
----> 1 profile.to_file("your_report.html")

File ~/miniforge3/lib/python3.9/site-packages/pandas_profiling/profile_report.py:277, in ProfileReport.to_file(self, output_file, silent)
    274         self.config.html.assets_prefix = str(output_file.stem) + "_assets"
    275     create_html_assets(self.config, output_file)
--> 277 da
<img width="474" alt="Screen Shot 2022-09-13 at 1 11 56 PM" src="https://user-images.githubusercontent.com/5618407/189980358-69b1cb31-7a83-440c-a57c-2fc28dc46c4f.png">
ta = self.to_html()
    279 if output_file.suffix != ".html":
    280     suffix = output_file.suffix

File ~/miniforge3/lib/python3.9/site-packages/pandas_profiling/profile_report.py:388, in ProfileReport.to_html(self)
    380 def to_html(self) -> str:
    381     """Generate and return complete template as lengthy string
    382         for using with frameworks.
    383 
   (...)
    386 
    387     """
--> 388     return self.html

File ~/miniforge3/lib/python3.9/site-packages/pandas_profiling/profile_report.py:205, in ProfileReport.html(self)
    202 @property
    203 def html(self) -> str:
    204     if self._html is None:
--> 205         self._html = self._render_html()
    206     return self._html

File ~/miniforge3/lib/python3.9/site-packages/pandas_profiling/profile_report.py:307, in ProfileReport._render_html(self)
    304 def _render_html(self) -> str:
    305     from pandas_profiling.report.presentation.flavours import HTMLReport
--> 307     report = self.report
    309     with tqdm(
    310         total=1, desc="Render HTML", disable=not self.config.progress_bar
    311     ) as pbar:
    312         html = HTMLReport(copy.deepcopy(report)).render(
    313             nav=self.config.html.navbar_show,
    314             offline=self.config.html.use_local_assets,
   (...)
    322             version=self.description_set["package"]["pandas_profiling_version"],
    323         )

File ~/miniforge3/lib/python3.9/site-packages/pandas_profiling/profile_report.py:199, in ProfileReport.report(self)
    196 @property
    197 def report(self) -> Root:
    198     if self._report is None:
--> 199         self._report = get_report_structure(self.config, self.description_set)
    200     return self._report

File ~/miniforge3/lib/python3.9/site-packages/pandas_profiling/profile_report.py:181, in ProfileReport.description_set(self)
    178 @property
    179 def description_set(self) -> Dict[str, Any]:
    180     if self._description_set is None:
--> 181         self._description_set = describe_df(
    182             self.config,
    183             self.df,
    184             self.summarizer,
    185             self.typeset,
    186             self._sample,
    187         )
    188     return self._description_set

File ~/miniforge3/lib/python3.9/site-packages/pandas_profiling/model/describe.py:115, in describe(config, df, summarizer, typeset, sample)
    111 scatter_matrix: Dict[Any, Dict[Any, Any]] = {
    112     x: {y: None} for x, y in scatter_tasks
    113 }
    114 for x, y in scatter_tasks:
--> 115     scatter_matrix[x][y] = progress(
    116         get_scatter_plot, pbar, f"scatter {x}, {y}"
    117     )(config, df, x, y, interval_columns)
    119 # Table statistics
    120 table_stats = progress(get_table_stats, pbar, "Get dataframe statistics")(
    121     config, df, series_description
    122 )

File ~/miniforge3/lib/python3.9/site-packages/pandas_profiling/utils/progress_bar.py:11, in progress.<locals>.inner(*args, **kwargs)
      8 @wraps(fn)
      9 def inner(*args, **kwargs) -> Any:
     10     bar.set_postfix_str(message)
---> 11     ret = fn(*args, **kwargs)
     12     bar.update()
     13     return ret

File ~/miniforge3/lib/python3.9/site-packages/pandas_profiling/model/pairwise.py:31, in get_scatter_plot(config, df, x, y, continuous_variables)
     29     else:
     30         df_temp = df[[x, y]].dropna()
---> 31     return scatter_pairwise(config, df_temp[x], df_temp[y], x, y)
     32 else:
     33     return ""

File ~/miniforge3/lib/python3.9/contextlib.py:79, in ContextDecorator.__call__.<locals>.inner(*args, **kwds)
     76 @wraps(func)
     77 def inner(*args, **kwds):
     78     with self._recreate_cm():
---> 79         return func(*args, **kwds)

File ~/miniforge3/lib/python3.9/site-packages/pandas_profiling/visualisation/plot.py:294, in scatter_pairwise(config, series1, series2, x_label, y_label)
    290 plt.ylabel(y_label)
    292 color = config.html.style.primary_color
--> 294 indices = (series1.notna()) & (series2.notna())
    295 if len(series1) > config.plot.scatter_threshold:
    296     cmap = sns.light_palette(color, as_cmap=True)

File ~/miniforge3/lib/python3.9/site-packages/pandas/core/ops/common.py:70, in _unpack_zerodim_and_defer.<locals>.new_method(self, other)
     66             return NotImplemented
     68 other = item_from_zerodim(other)
---> 70 return method(self, other)

File ~/miniforge3/lib/python3.9/site-packages/pandas/core/arraylike.py:70, in OpsMixin.__and__(self, other)
     68 @unpack_zerodim_and_defer("__and__")
     69 def __and__(self, other):
---> 70     return self._logical_method(other, operator.and_)

File ~/miniforge3/lib/python3.9/site-packages/pandas/core/frame.py:6946, in DataFrame._arith_method(self, other, op)
   6943 axis = 1  # only relevant for Series other case
   6944 other = ops.maybe_prepare_scalar_for_op(other, (self.shape[axis],))
-> 6946 self, other = ops.align_method_FRAME(self, other, axis, flex=True, level=None)
   6948 new_data = self._dispatch_frame_op(other, op, axis=axis)
   6949 return self._construct_result(new_data)

File ~/miniforge3/lib/python3.9/site-packages/pandas/core/ops/__init__.py:307, in align_method_FRAME(left, right, axis, flex, level)
    297         if not left.axes[axis].equals(right.index):
    298             warnings.warn(
    299                 "Automatic reindexing on DataFrame vs Series comparisons "
    300                 "is deprecated and will raise ValueError in a future version. "
   (...)
    304                 stacklevel=find_stack_level(),
    305             )
--> 307     left, right = left.align(
    308         right, join="outer", axis=axis, level=level, copy=False
    309     )
    310     right = _maybe_align_series_as_frame(left, right, axis)
    312 return left, right

File ~/miniforge3/lib/python3.9/site-packages/pandas/core/frame.py:4711, in DataFrame.align(self, other, join, axis, level, copy, fill_value, method, limit, fill_axis, broadcast_axis)
   4697 @doc(NDFrame.align, **_shared_doc_kwargs)
   4698 def align(
   4699     self,
   (...)
   4709     broadcast_axis: Axis | None = None,
   4710 ) -> DataFrame:
-> 4711     return super().align(
   4712         other,
   4713         join=join,
   4714         axis=axis,
   4715         level=level,
   4716         copy=copy,
   4717         fill_value=fill_value,
   4718         method=method,
   4719         limit=limit,
   4720         fill_axis=fill_axis,
   4721         broadcast_axis=broadcast_axis,
   4722     )

File ~/miniforge3/lib/python3.9/site-packages/pandas/core/generic.py:8878, in NDFrame.align(self, other, join, axis, level, copy, fill_value, method, limit, fill_axis, broadcast_axis)
   8866     return self._align_frame(
   8867         other,
   8868         join=join,
   (...)
   8875         fill_axis=fill_axis,
   8876     )
   8877 elif isinstance(other, ABCSeries):
-> 8878     return self._align_series(
   8879         other,
   8880         join=join,
   8881         axis=axis,
   8882         level=level,
   8883         copy=copy,
   8884         fill_value=fill_value,
   8885         method=method,
   8886         limit=limit,
   8887         fill_axis=fill_axis,
   8888     )
   8889 else:  # pragma: no cover
   8890     raise TypeError(f"unsupported type: {type(other)}")

File ~/miniforge3/lib/python3.9/site-packages/pandas/core/generic.py:8995, in NDFrame._align_series(self, other, join, axis, level, copy, fill_value, method, limit, fill_axis)
   8993 lidx, ridx = None, None
   8994 if not join_index.equals(other.index):
-> 8995     join_index, lidx, ridx = join_index.join(
   8996         other.index, how=join, level=level, return_indexers=True
   8997     )
   8999 if lidx is not None:
   9000     bm_axis = self._get_block_manager_axis(axis)

File ~/miniforge3/lib/python3.9/site-packages/pandas/core/indexes/base.py:216, in _maybe_return_indexers.<locals>.join(self, other, how, level, return_indexers, sort)
    207 @functools.wraps(meth)
    208 def join(
    209     self,
   (...)
    214     sort: bool = False,
    215 ):
--> 216     join_index, lidx, ridx = meth(self, other, how=how, level=level, sort=sort)
    217     if not return_indexers:
    218         return join_index

File ~/miniforge3/lib/python3.9/site-packages/pandas/core/indexes/base.py:4396, in Index.join(self, other, how, level, return_indexers, sort)
   4394     this = self.astype(dtype, copy=False)
   4395     other = other.astype(dtype, copy=False)
-> 4396     return this.join(other, how=how, return_indexers=True)
   4398 _validate_join_method(how)
   4400 if not self.is_unique and not other.is_unique:

File ~/miniforge3/lib/python3.9/site-packages/pandas/core/indexes/base.py:216, in _maybe_return_indexers.<locals>.join(self, other, how, level, return_indexers, sort)
    207 @functools.wraps(meth)
    208 def join(
    209     self,
   (...)
    214     sort: bool = False,
    215 ):
--> 216     join_index, lidx, ridx = meth(self, other, how=how, level=level, sort=sort)
    217     if not return_indexers:
    218         return join_index

File ~/miniforge3/lib/python3.9/site-packages/pandas/core/indexes/base.py:4406, in Index.join(self, other, how, level, return_indexers, sort)
   4403 if self.is_monotonic and other.is_monotonic:
   4404     if self._can_use_libjoin:
   4405         # otherwise we will fall through to _join_via_get_indexer
-> 4406         return self._join_monotonic(other, how=how)
   4407 else:
   4408     return self._join_non_unique(other, how=how)

File ~/miniforge3/lib/python3.9/site-packages/pandas/core/indexes/base.py:4754, in Index._join_monotonic(self, other, how)
   4752         join_array, lidx, ridx = self._inner_indexer(other)
   4753     elif how == "outer":
-> 4754         join_array, lidx, ridx = self._outer_indexer(other)
   4756     join_index = self._wrap_joined_index(join_array, other)
   4758 lidx = None if lidx is None else ensure_platform_int(lidx)

File ~/miniforge3/lib/python3.9/site-packages/pandas/core/indexes/base.py:358, in Index._outer_indexer(self, other)
    356 sv = self._get_engine_target()
    357 ov = other._get_engine_target()
--> 358 joined_ndarray, lidx, ridx = libjoin.outer_join_indexer(sv, ov)
    359 joined = self._from_join_target(joined_ndarray)
    360 return joined, lidx, ridx

File ~/miniforge3/lib/python3.9/site-packages/pandas/_libs/join.pyx:575, in pandas._libs.join.outer_join_indexer()

TypeError: '<' not supported between instances of 'str' and 'int'

pandas-profiling version

3.3.0

OS

macOS

Checklist

  • [X] There is not yet another bug report for this issue in the issue tracker
  • [X] The problem is reproducible from this bug report. This guide can help to craft a minimal bug report.
  • [X] The issue has not been resolved by the entries listed under Common Issues.

rasbt avatar Sep 13 '22 18:09 rasbt