ibis
ibis copied to clipboard
bug: arrow type error when show data with 'UUID' object
What happened?
Issue 1: duckdb will produce different uuid for each row, but same uuid generated by sqlite, there maybe other backends have the same issue.
import ibis
ibis.options.interactive = True
from ibis.expr.api import row_number, uuid, now, pi
ibis.set_backend("sqlite")
t = ibis.examples.penguins.fetch()
t.mutate(uuid=ibis.uuid()).to_pandas()
Issue 2: get ArrowTypeError when show data:
import ibis
ibis.options.interactive = True
from ibis.expr.api import row_number, uuid, now, pi
ibis.set_backend("sqlite")
t = ibis.examples.penguins.fetch()
t1 = t.mutate(uuid=uuid())
t1[t1.my_uuid].head()
Got the following error:
Out[7]: ╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ /Users/voltrondata/repos/ibis/ibis/expr/types/relations.py:516 in __interactive_rich_console__ │
│ │
│ 513 │ │ │ width = options.max_width │
│ 514 │ │ │
│ 515 │ │ try: │
│ ❱ 516 │ │ │ table = to_rich_table(self, width) │
│ 517 │ │ except Exception as e: │
│ 518 │ │ │ # In IPython exceptions inside of _repr_mimebundle_ are swallowed to │
│ 519 │ │ │ # allow calling several display functions and choosing to display │
│ │
│ /Users/voltrondata/repos/ibis/ibis/expr/types/pretty.py:265 in to_rich_table │
│ │
│ 262 │ │
│ 263 │ # Compute the data and return a pandas dataframe │
│ 264 │ nrows = ibis.options.repr.interactive.max_rows │
│ ❱ 265 │ result = table.limit(nrows + 1).to_pyarrow() │
│ 266 │ │
│ 267 │ # Now format the columns in order, stopping if the console width would │
│ 268 │ # be exceeded. │
│ │
│ /Users/voltrondata/repos/ibis/ibis/expr/types/core.py:425 in to_pyarrow │
│ │
│ 422 │ │ Table │
│ 423 │ │ │ A pyarrow table holding the results of the executed expression. │
│ 424 │ │ """ │
│ ❱ 425 │ │ return self._find_backend(use_default=True).to_pyarrow( │
│ 426 │ │ │ self, params=params, limit=limit, **kwargs │
│ 427 │ │ ) │
│ 428 │
│ │
│ /Users/voltrondata/repos/ibis/ibis/backends/__init__.py:218 in to_pyarrow │
│ │
│ 215 │ │ table_expr = expr.as_table() │
│ 216 │ │ schema = table_expr.schema() │
│ 217 │ │ arrow_schema = schema.to_pyarrow() │
│ ❱ 218 │ │ with self.to_pyarrow_batches( │
│ 219 │ │ │ table_expr, params=params, limit=limit, **kwargs │
│ 220 │ │ ) as reader: │
│ 221 │ │ │ table = pa.Table.from_batches(reader, schema=arrow_schema) │
│ │
│ /Users/voltrondata/repos/ibis/ibis/backends/sqlite/__init__.py:264 in to_pyarrow_batches │
│ │
│ 261 │ │ │ self.compile(expr, limit=limit, params=params) │
│ 262 │ │ ) as cursor: │
│ 263 │ │ │ df = self._fetch_from_cursor(cursor, schema) │
│ ❱ 264 │ │ table = pa.Table.from_pandas( │
│ 265 │ │ │ df, schema=schema.to_pyarrow(), preserve_index=False │
│ 266 │ │ ) │
│ 267 │ │ return table.to_reader(max_chunksize=chunk_size) │
│ │
│ in pyarrow.lib.Table.from_pandas:3874 │
│ │
│ /Users/claypot/miniconda3/envs/ibis-dev-arm64/lib/python3.11/site-packages/pyarrow/pandas_compat │
│ .py:611 in dataframe_to_arrays │
│ │
│ 608 │ │ │ │ issubclass(arr.dtype.type, np.integer)) │
│ 609 │ │
│ 610 │ if nthreads == 1: │
│ ❱ 611 │ │ arrays = [convert_column(c, f) │
│ 612 │ │ │ │ for c, f in zip(columns_to_convert, convert_fields)] │
│ 613 │ else: │
│ 614 │ │ arrays = [] │
│ │
│ /Users/claypot/miniconda3/envs/ibis-dev-arm64/lib/python3.11/site-packages/pyarrow/pandas_compat │
│ .py:611 in <listcomp> │
│ │
│ 608 │ │ │ │ issubclass(arr.dtype.type, np.integer)) │
│ 609 │ │
│ 610 │ if nthreads == 1: │
│ ❱ 611 │ │ arrays = [convert_column(c, f) │
│ 612 │ │ │ │ for c, f in zip(columns_to_convert, convert_fields)] │
│ 613 │ else: │
│ 614 │ │ arrays = [] │
│ │
│ /Users/claypot/miniconda3/envs/ibis-dev-arm64/lib/python3.11/site-packages/pyarrow/pandas_compat │
│ .py:598 in convert_column │
│ │
│ 595 │ │ │ │ pa.ArrowTypeError) as e: │
│ 596 │ │ │ e.args += ("Conversion failed for column {!s} with type {!s}" │
│ 597 │ │ │ │ │ .format(col.name, col.dtype),) │
│ ❱ 598 │ │ │ raise e │
│ 599 │ │ if not field_nullable and result.null_count > 0: │
│ 600 │ │ │ raise ValueError("Field {} was non-nullable but pandas column " │
│ 601 │ │ │ │ │ │ │ "had {} null values".format(str(field), │
│ │
│ /Users/claypot/miniconda3/envs/ibis-dev-arm64/lib/python3.11/site-packages/pyarrow/pandas_compat │
│ .py:592 in convert_column │
│ │
│ 589 │ │ │ type_ = field.type │
│ 590 │ │ │
│ 591 │ │ try: │
│ ❱ 592 │ │ │ result = pa.array(col, type=type_, from_pandas=True, safe=safe) │
│ 593 │ │ except (pa.ArrowInvalid, │
│ 594 │ │ │ │ pa.ArrowNotImplementedError, │
│ 595 │ │ │ │ pa.ArrowTypeError) as e: │
│ │
│ in pyarrow.lib.array:340 │
│ │
│ in pyarrow.lib._ndarray_to_array:86 │
│ │
│ in pyarrow.lib.check_status:91 │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
ArrowTypeError: ("Expected bytes, got a 'UUID' object", 'Conversion failed for column my_uuid with type
object')
it works well for to_pandas()
In [8]: t1[t1.my_uuid].to_pandas()
Out[8]:
my_uuid
0 3f661a76-2d0e-4622-862e-1c4adcfd4813
1 3f661a76-2d0e-4622-862e-1c4adcfd4813
2 3f661a76-2d0e-4622-862e-1c4adcfd4813
3 3f661a76-2d0e-4622-862e-1c4adcfd4813
4 3f661a76-2d0e-4622-862e-1c4adcfd4813
.. ...
339 3f661a76-2d0e-4622-862e-1c4adcfd4813
340 3f661a76-2d0e-4622-862e-1c4adcfd4813
341 3f661a76-2d0e-4622-862e-1c4adcfd4813
342 3f661a76-2d0e-4622-862e-1c4adcfd4813
343 3f661a76-2d0e-4622-862e-1c4adcfd4813
What version of ibis are you using?
8.0.0
What backend(s) are you using, if any?
duckdb, sqlite
Relevant log output
No response
Code of Conduct
- [X] I agree to follow this project's Code of Conduct