ydata-profiling
ydata-profiling copied to clipboard
Fails on simple Iris dataset
Current Behaviour
Using the simple Iris dataset as an example, the report generation fails with an error.
Expected Behaviour
It should generate the report because the dataset format is similar to the example one in the README
Data Description
DataFrame with 4 numeric columns.
Code that reproduces the bug
import pandas as pd
from pandas_profiling import ProfileReport
from sklearn.datasets import load_iris
iris = load_iris()
X_iris = iris.data
X_df = pd.DataFrame(
X_iris, columns=["sepal length", "sepal width", "petal width", "petal width"]
)
profile = ProfileReport(X_df, title="Report")
profile.to_file("your_report.html")
where X_df is
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Input In [5], in <module>
----> 1 profile.to_file("your_report.html")
File ~/miniforge3/lib/python3.9/site-packages/pandas_profiling/profile_report.py:277, in ProfileReport.to_file(self, output_file, silent)
274 self.config.html.assets_prefix = str(output_file.stem) + "_assets"
275 create_html_assets(self.config, output_file)
--> 277 da
<img width="474" alt="Screen Shot 2022-09-13 at 1 11 56 PM" src="https://user-images.githubusercontent.com/5618407/189980358-69b1cb31-7a83-440c-a57c-2fc28dc46c4f.png">
ta = self.to_html()
279 if output_file.suffix != ".html":
280 suffix = output_file.suffix
File ~/miniforge3/lib/python3.9/site-packages/pandas_profiling/profile_report.py:388, in ProfileReport.to_html(self)
380 def to_html(self) -> str:
381 """Generate and return complete template as lengthy string
382 for using with frameworks.
383
(...)
386
387 """
--> 388 return self.html
File ~/miniforge3/lib/python3.9/site-packages/pandas_profiling/profile_report.py:205, in ProfileReport.html(self)
202 @property
203 def html(self) -> str:
204 if self._html is None:
--> 205 self._html = self._render_html()
206 return self._html
File ~/miniforge3/lib/python3.9/site-packages/pandas_profiling/profile_report.py:307, in ProfileReport._render_html(self)
304 def _render_html(self) -> str:
305 from pandas_profiling.report.presentation.flavours import HTMLReport
--> 307 report = self.report
309 with tqdm(
310 total=1, desc="Render HTML", disable=not self.config.progress_bar
311 ) as pbar:
312 html = HTMLReport(copy.deepcopy(report)).render(
313 nav=self.config.html.navbar_show,
314 offline=self.config.html.use_local_assets,
(...)
322 version=self.description_set["package"]["pandas_profiling_version"],
323 )
File ~/miniforge3/lib/python3.9/site-packages/pandas_profiling/profile_report.py:199, in ProfileReport.report(self)
196 @property
197 def report(self) -> Root:
198 if self._report is None:
--> 199 self._report = get_report_structure(self.config, self.description_set)
200 return self._report
File ~/miniforge3/lib/python3.9/site-packages/pandas_profiling/profile_report.py:181, in ProfileReport.description_set(self)
178 @property
179 def description_set(self) -> Dict[str, Any]:
180 if self._description_set is None:
--> 181 self._description_set = describe_df(
182 self.config,
183 self.df,
184 self.summarizer,
185 self.typeset,
186 self._sample,
187 )
188 return self._description_set
File ~/miniforge3/lib/python3.9/site-packages/pandas_profiling/model/describe.py:115, in describe(config, df, summarizer, typeset, sample)
111 scatter_matrix: Dict[Any, Dict[Any, Any]] = {
112 x: {y: None} for x, y in scatter_tasks
113 }
114 for x, y in scatter_tasks:
--> 115 scatter_matrix[x][y] = progress(
116 get_scatter_plot, pbar, f"scatter {x}, {y}"
117 )(config, df, x, y, interval_columns)
119 # Table statistics
120 table_stats = progress(get_table_stats, pbar, "Get dataframe statistics")(
121 config, df, series_description
122 )
File ~/miniforge3/lib/python3.9/site-packages/pandas_profiling/utils/progress_bar.py:11, in progress.<locals>.inner(*args, **kwargs)
8 @wraps(fn)
9 def inner(*args, **kwargs) -> Any:
10 bar.set_postfix_str(message)
---> 11 ret = fn(*args, **kwargs)
12 bar.update()
13 return ret
File ~/miniforge3/lib/python3.9/site-packages/pandas_profiling/model/pairwise.py:31, in get_scatter_plot(config, df, x, y, continuous_variables)
29 else:
30 df_temp = df[[x, y]].dropna()
---> 31 return scatter_pairwise(config, df_temp[x], df_temp[y], x, y)
32 else:
33 return ""
File ~/miniforge3/lib/python3.9/contextlib.py:79, in ContextDecorator.__call__.<locals>.inner(*args, **kwds)
76 @wraps(func)
77 def inner(*args, **kwds):
78 with self._recreate_cm():
---> 79 return func(*args, **kwds)
File ~/miniforge3/lib/python3.9/site-packages/pandas_profiling/visualisation/plot.py:294, in scatter_pairwise(config, series1, series2, x_label, y_label)
290 plt.ylabel(y_label)
292 color = config.html.style.primary_color
--> 294 indices = (series1.notna()) & (series2.notna())
295 if len(series1) > config.plot.scatter_threshold:
296 cmap = sns.light_palette(color, as_cmap=True)
File ~/miniforge3/lib/python3.9/site-packages/pandas/core/ops/common.py:70, in _unpack_zerodim_and_defer.<locals>.new_method(self, other)
66 return NotImplemented
68 other = item_from_zerodim(other)
---> 70 return method(self, other)
File ~/miniforge3/lib/python3.9/site-packages/pandas/core/arraylike.py:70, in OpsMixin.__and__(self, other)
68 @unpack_zerodim_and_defer("__and__")
69 def __and__(self, other):
---> 70 return self._logical_method(other, operator.and_)
File ~/miniforge3/lib/python3.9/site-packages/pandas/core/frame.py:6946, in DataFrame._arith_method(self, other, op)
6943 axis = 1 # only relevant for Series other case
6944 other = ops.maybe_prepare_scalar_for_op(other, (self.shape[axis],))
-> 6946 self, other = ops.align_method_FRAME(self, other, axis, flex=True, level=None)
6948 new_data = self._dispatch_frame_op(other, op, axis=axis)
6949 return self._construct_result(new_data)
File ~/miniforge3/lib/python3.9/site-packages/pandas/core/ops/__init__.py:307, in align_method_FRAME(left, right, axis, flex, level)
297 if not left.axes[axis].equals(right.index):
298 warnings.warn(
299 "Automatic reindexing on DataFrame vs Series comparisons "
300 "is deprecated and will raise ValueError in a future version. "
(...)
304 stacklevel=find_stack_level(),
305 )
--> 307 left, right = left.align(
308 right, join="outer", axis=axis, level=level, copy=False
309 )
310 right = _maybe_align_series_as_frame(left, right, axis)
312 return left, right
File ~/miniforge3/lib/python3.9/site-packages/pandas/core/frame.py:4711, in DataFrame.align(self, other, join, axis, level, copy, fill_value, method, limit, fill_axis, broadcast_axis)
4697 @doc(NDFrame.align, **_shared_doc_kwargs)
4698 def align(
4699 self,
(...)
4709 broadcast_axis: Axis | None = None,
4710 ) -> DataFrame:
-> 4711 return super().align(
4712 other,
4713 join=join,
4714 axis=axis,
4715 level=level,
4716 copy=copy,
4717 fill_value=fill_value,
4718 method=method,
4719 limit=limit,
4720 fill_axis=fill_axis,
4721 broadcast_axis=broadcast_axis,
4722 )
File ~/miniforge3/lib/python3.9/site-packages/pandas/core/generic.py:8878, in NDFrame.align(self, other, join, axis, level, copy, fill_value, method, limit, fill_axis, broadcast_axis)
8866 return self._align_frame(
8867 other,
8868 join=join,
(...)
8875 fill_axis=fill_axis,
8876 )
8877 elif isinstance(other, ABCSeries):
-> 8878 return self._align_series(
8879 other,
8880 join=join,
8881 axis=axis,
8882 level=level,
8883 copy=copy,
8884 fill_value=fill_value,
8885 method=method,
8886 limit=limit,
8887 fill_axis=fill_axis,
8888 )
8889 else: # pragma: no cover
8890 raise TypeError(f"unsupported type: {type(other)}")
File ~/miniforge3/lib/python3.9/site-packages/pandas/core/generic.py:8995, in NDFrame._align_series(self, other, join, axis, level, copy, fill_value, method, limit, fill_axis)
8993 lidx, ridx = None, None
8994 if not join_index.equals(other.index):
-> 8995 join_index, lidx, ridx = join_index.join(
8996 other.index, how=join, level=level, return_indexers=True
8997 )
8999 if lidx is not None:
9000 bm_axis = self._get_block_manager_axis(axis)
File ~/miniforge3/lib/python3.9/site-packages/pandas/core/indexes/base.py:216, in _maybe_return_indexers.<locals>.join(self, other, how, level, return_indexers, sort)
207 @functools.wraps(meth)
208 def join(
209 self,
(...)
214 sort: bool = False,
215 ):
--> 216 join_index, lidx, ridx = meth(self, other, how=how, level=level, sort=sort)
217 if not return_indexers:
218 return join_index
File ~/miniforge3/lib/python3.9/site-packages/pandas/core/indexes/base.py:4396, in Index.join(self, other, how, level, return_indexers, sort)
4394 this = self.astype(dtype, copy=False)
4395 other = other.astype(dtype, copy=False)
-> 4396 return this.join(other, how=how, return_indexers=True)
4398 _validate_join_method(how)
4400 if not self.is_unique and not other.is_unique:
File ~/miniforge3/lib/python3.9/site-packages/pandas/core/indexes/base.py:216, in _maybe_return_indexers.<locals>.join(self, other, how, level, return_indexers, sort)
207 @functools.wraps(meth)
208 def join(
209 self,
(...)
214 sort: bool = False,
215 ):
--> 216 join_index, lidx, ridx = meth(self, other, how=how, level=level, sort=sort)
217 if not return_indexers:
218 return join_index
File ~/miniforge3/lib/python3.9/site-packages/pandas/core/indexes/base.py:4406, in Index.join(self, other, how, level, return_indexers, sort)
4403 if self.is_monotonic and other.is_monotonic:
4404 if self._can_use_libjoin:
4405 # otherwise we will fall through to _join_via_get_indexer
-> 4406 return self._join_monotonic(other, how=how)
4407 else:
4408 return self._join_non_unique(other, how=how)
File ~/miniforge3/lib/python3.9/site-packages/pandas/core/indexes/base.py:4754, in Index._join_monotonic(self, other, how)
4752 join_array, lidx, ridx = self._inner_indexer(other)
4753 elif how == "outer":
-> 4754 join_array, lidx, ridx = self._outer_indexer(other)
4756 join_index = self._wrap_joined_index(join_array, other)
4758 lidx = None if lidx is None else ensure_platform_int(lidx)
File ~/miniforge3/lib/python3.9/site-packages/pandas/core/indexes/base.py:358, in Index._outer_indexer(self, other)
356 sv = self._get_engine_target()
357 ov = other._get_engine_target()
--> 358 joined_ndarray, lidx, ridx = libjoin.outer_join_indexer(sv, ov)
359 joined = self._from_join_target(joined_ndarray)
360 return joined, lidx, ridx
File ~/miniforge3/lib/python3.9/site-packages/pandas/_libs/join.pyx:575, in pandas._libs.join.outer_join_indexer()
TypeError: '<' not supported between instances of 'str' and 'int'
pandas-profiling version
3.3.0
OS
macOS
Checklist
- [X] There is not yet another bug report for this issue in the issue tracker
- [X] The problem is reproducible from this bug report. This guide can help to craft a minimal bug report.
- [X] The issue has not been resolved by the entries listed under Common Issues.