xorbits
xorbits copied to clipboard
BUG: CUDF deserialize error when data contains NA type like np.nan or pd.NA
Note that the issue tracker is NOT the place for general support. For discussions about development, questions about usage, or any general questions, contact us on https://discuss.xorbits.io/.
Minimal Reproduction:
df = md.DataFrame({'a': ["foo", "baz", np.nan]}, chunk_size=1).to_gpu()
res = df.execute().fetch(to_cpu=False)
print(res)
Error:
_mars/dataframe/core.py:845: in fetch
batches = list(self._iter(batch_size=batch_size, session=session, **kw))
_mars/dataframe/core.py:829: in _iter
yield self._fetch(session=session, **kw)
_mars/core/entity/executable.py:169: in _fetch
return fetch(self, session=session, **kw)
_mars/deploy/oscar/session.py:1760: in fetch
return session.fetch(tileable, *tileables, **kwargs)
_mars/deploy/oscar/session.py:1562: in fetch
return asyncio.run_coroutine_threadsafe(coro, self._loop).result()
/home/lichengjie/miniconda3/lib/python3.9/concurrent/futures/_base.py:446: in result
return self.__get_result()
/home/lichengjie/miniconda3/lib/python3.9/concurrent/futures/_base.py:391: in __get_result
raise self._exception
_mars/deploy/oscar/session.py:1728: in _fetch
data = await session.fetch(tileable, *tileables, **kwargs)
_mars/deploy/oscar/tests/session.py:64: in fetch
results = await super().fetch(*tileables, **kwargs)
_mars/deploy/oscar/session.py:1019: in fetch
fetched_data = await fetcher.get()
_mars/services/task/execution/mars/fetcher.py:60: in get
fetched_data = await storage_api.get.batch(
/home/lichengjie/miniconda3/lib/python3.9/site-packages/xoscar/batch.py:149: in _async_batch
return await self.batch_func(args_list, kwargs_list)
_mars/services/storage/api/oscar.py:113: in batch_get
return await self._storage_handler_ref.get.batch(*gets)
/home/lichengjie/miniconda3/lib/python3.9/site-packages/xoscar/backends/context.py:203: in send
result = await self._wait(future, actor_ref.address, send_message) # type: ignore
/home/lichengjie/miniconda3/lib/python3.9/site-packages/xoscar/backends/context.py:92: in _wait
return await future
/home/lichengjie/miniconda3/lib/python3.9/site-packages/xoscar/backends/context.py:83: in _wait
await asyncio.shield(future)
xoscar/serialization/core.pyx:913: in xoscar.serialization.core.deserialize
???
xoscar/serialization/core.pyx:811: in xoscar.serialization.core._deserial_single
???
xoscar/serialization/core.pyx:106: in xoscar.serialization.core.Serializer.deserial
???
/home/lichengjie/miniconda3/lib/python3.9/site-packages/xoscar/serialization/cuda.py:97: in deserial
result = Serializable.device_deserialize(header, buffers)
/home/lichengjie/miniconda3/lib/python3.9/site-packages/cudf/core/abc.py:134: in device_deserialize
frames = [
/home/lichengjie/miniconda3/lib/python3.9/site-packages/cudf/core/abc.py:135: in <listcomp>
cudf.core.buffer.as_device_buffer_like(f) if c else memoryview(f)
/home/lichengjie/miniconda3/lib/python3.9/site-packages/cudf/core/buffer.py:125: in as_device_buffer_like
return Buffer(obj)
/home/lichengjie/miniconda3/lib/python3.9/site-packages/cudf/core/buffer.py:188: in __init__
ptr, size = get_ptr_and_size(np.asarray(buf).__array_interface__)
/home/lichengjie/miniconda3/lib/python3.9/site-packages/cudf/core/buffer.py:320: in get_ptr_and_size
itemsize = cudf.dtype(array_interface["typestr"]).itemsize
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
arbitrary = '|S1'
def dtype(arbitrary):
"""
Return the cuDF-supported dtype corresponding to `arbitrary`.
Parameters
----------
arbitrary: dtype or scalar-like
Returns
-------
dtype: the cuDF-supported dtype that best matches `arbitrary`
"""
# first, try interpreting arbitrary as a NumPy dtype that we support:
try:
np_dtype = np.dtype(arbitrary)
if np_dtype.kind in ("OU"):
return np.dtype("object")
except TypeError:
pass
else:
if np_dtype not in cudf._lib.types.SUPPORTED_NUMPY_TO_LIBCUDF_TYPES:
> raise TypeError(f"Unsupported type {np_dtype}")
E TypeError: Unsupported type |S1
/home/lichengjie/miniconda3/lib/python3.9/site-packages/cudf/core/dtypes.py:51: TypeError