ibis
ibis copied to clipboard
bug: Ibis `ibis.memtable(df).to_polars()` doesn't round-trip Polars
What happened?
In [1]: import polars as pl
In [2]: import ibis
In [3]: ibis.set_backend('polars')
In [4]: df = pl.DataFrame({'a': [1,1,2], 'b': ['red', 'red', 'green']}, schema_overrides={'b': pl.Enum(['red', 'green',
...: 'white'])})
In [5]: df
Out[5]:
shape: (3, 2)
┌─────┬───────┐
│ a ┆ b │
│ --- ┆ --- │
│ i64 ┆ enum │
╞═════╪═══════╡
│ 1 ┆ red │
│ 1 ┆ red │
│ 2 ┆ green │
└─────┴───────┘
In [6]: ibis.memtable(df).to_polars()
Out[6]:
shape: (3, 2)
┌─────┬───────┐
│ a ┆ b │
│ --- ┆ --- │
│ i64 ┆ str │
╞═════╪═══════╡
│ 1 ┆ red │
│ 1 ┆ red │
│ 2 ┆ green │
└─────┴───────┘
What version of ibis are you using?
10.5.0
What backend(s) are you using, if any?
No response
Relevant log output
Code of Conduct
- [x] I agree to follow this project's Code of Conduct
In this case it seems to be because of enums, which we don't support as a user-facing type. Are there other types where you observe the non round-trip behavior?
thanks for your response - the only other one i noticed is
In [13]: df = pl.DataFrame({'a': [1,2]}, schema={'a': pl.Int128})
In [14]: ibis.memtable(df).to_polars()
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
Cell In[14], line 1
----> 1 ibis.memtable(df).to_polars()
File ~/polars-api-compat-dev/.venv/lib/python3.12/site-packages/ibis/expr/api.py:423, in memtable(data, columns, schema, name)
419 import ibis
421 schema = ibis.schema(schema)
--> 423 return _memtable(data, name=name, schema=schema, columns=columns)
File ~/polars-api-compat-dev/.venv/lib/python3.12/site-packages/ibis/common/dispatch.py:115, in lazy_singledispatch.<locals>.call(arg, *args, **kwargs)
112 @functools.wraps(func)
113 def call(arg, *args, **kwargs):
114 impl = dispatcher.dispatch(type(arg))
--> 115 return impl(arg, *args, **kwargs)
File ~/polars-api-compat-dev/.venv/lib/python3.12/site-packages/ibis/expr/api.py:572, in _memtable_from_polars_dataframe(data, name, schema, columns)
568 assert schema is None, "if `columns` is not `None` then `schema` must be `None`"
569 schema = sch.Schema(dict(zip(columns, sch.infer(data).values())))
570 return ops.InMemoryTable(
571 name=name if name is not None else util.gen_name("polars_memtable"),
--> 572 schema=sch.infer(data) if schema is None else schema,
573 data=PolarsDataFrameProxy(data),
574 ).to_expr()
File ~/polars-api-compat-dev/.venv/lib/python3.12/site-packages/ibis/common/dispatch.py:115, in lazy_singledispatch.<locals>.call(arg, *args, **kwargs)
112 @functools.wraps(func)
113 def call(arg, *args, **kwargs):
114 impl = dispatcher.dispatch(type(arg))
--> 115 return impl(arg, *args, **kwargs)
File ~/polars-api-compat-dev/.venv/lib/python3.12/site-packages/ibis/expr/schema.py:379, in infer_polars_dataframe(df)
374 @infer.register("polars.DataFrame")
375 @infer.register("polars.LazyFrame")
376 def infer_polars_dataframe(df):
377 from ibis.formats.polars import PolarsSchema
--> 379 return PolarsSchema.to_ibis(df.collect_schema())
File ~/polars-api-compat-dev/.venv/lib/python3.12/site-packages/ibis/formats/polars.py:128, in PolarsSchema.to_ibis(cls, schema)
124 @classmethod
125 def to_ibis(cls, schema: dict[str, pl.DataType]) -> Schema:
126 """Convert a polars schema to a schema."""
127 return Schema.from_tuples(
--> 128 [(name, PolarsType.to_ibis(typ)) for name, typ in schema.items()]
129 )
File ~/polars-api-compat-dev/.venv/lib/python3.12/site-packages/ibis/formats/polars.py:77, in PolarsType.to_ibis(cls, typ, nullable)
72 return dt.Struct.from_tuples(
73 [(field.name, cls.to_ibis(field.dtype)) for field in typ.fields],
74 nullable=nullable,
75 )
76 else:
---> 77 return _from_polars_types[base_type](nullable=nullable)
KeyError: Int128