awkward
awkward copied to clipboard
`ak.to_cudf` test failing (`test_3051_to_cuda.py::test_strings`)
awkward v2.8.4
cudf-cu12 25.6.0
FAILED tests-cuda/test_3051_to_cuda.py::test_strings - TypeError: StringColumn.__init__() missing 2 required positional arguments: 'size' and 'dtype'
def test_strings():
arr = ak.Array(["hey", "hi", "hum"])
> out = ak.to_cudf(arr)
^^^^^^^^^^^^^^^
arr = <Array ['hey', 'hi', 'hum'] type='3 * string'>
tests-cuda/test_3051_to_cuda.py:52:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/home/ar1092/micromamba/envs/test-env/lib/python3.13/site-packages/awkward/_dispatch.py:41: in dispatch
with OperationErrorContext(name, args, kwargs):
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
args = (<Array ['hey', 'hi', 'hum'] type='3 * string'>,)
dispatch = <function to_cudf at 0x1496b7ff02c0>
func = <function to_cudf at 0x1496b7ff0220>
kwargs = {}
name = 'ak.to_cudf'
/home/ar1092/micromamba/envs/test-env/lib/python3.13/site-packages/awkward/_errors.py:80: in __exit__
raise self.decorate_exception(exception_type, exception_value)
exception_type = <class 'TypeError'>
exception_value = TypeError("StringColumn.__init__() missing 2 required positional arguments: 'size' and 'dtype'")
self = <awkward._errors.OperationErrorContext object at 0x149416fe3890>
traceback = <traceback object at 0x149417096800>
/home/ar1092/micromamba/envs/test-env/lib/python3.13/site-packages/awkward/_dispatch.py:42: in dispatch
gen_or_result = func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
args = (<Array ['hey', 'hi', 'hum'] type='3 * string'>,)
dispatch = <function to_cudf at 0x1496b7ff02c0>
func = <function to_cudf at 0x1496b7ff0220>
kwargs = {}
name = 'ak.to_cudf'
/home/ar1092/micromamba/envs/test-env/lib/python3.13/site-packages/awkward/operations/ak_to_cudf.py:22: in to_cudf
return cudf.Series._from_column(array.layout._to_cudf(cudf, None, len(array)))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
array = <Array ['hey', 'hi', 'hum'] type='3 * string'>
cudf = <module 'cudf' from '/home/ar1092/micromamba/envs/test-env/lib/python3.13/site-packages/cudf/__init__.py'>
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <ListOffsetArray len='3'>
<parameter name='__array__'>'string'</parameter>
<offsets><Index dtype='int64' len='...'__array__'>'char'</parameter>
[104 101 121 104 105 104 117 109]
</NumpyArray></content>
</ListOffsetArray>
cudf = <module 'cudf' from '/home/ar1092/micromamba/envs/test-env/lib/python3.13/site-packages/cudf/__init__.py'>
mask = None, length = 3
def _to_cudf(self, cudf: Any, mask: Content | None, length: int):
from packaging.version import parse as parse_version
cupy = Cupy.instance()
index = materialize_if_virtual(self._offsets.raw(cupy))[0].astype("int32")
buf = cudf.core.buffer.as_buffer(index)
if parse_version(cudf.__version__) >= parse_version("24.10.00"):
ind_buf = cudf.core.column.numerical.NumericalColumn(
data=buf, dtype=index.dtype, mask=None, size=len(index)
)
else:
ind_buf = cudf.core.column.numerical.NumericalColumn(
buf, index.dtype, None, size=len(index)
)
cont = self._content._to_cudf(cudf, None, len(self._content))
if mask is not None:
m = np._module.packbits(mask, bitorder="little")
if m.nbytes % 64:
m = cupy.resize(m, ((m.nbytes // 64) + 1) * 64)
m = cudf.core.buffer.as_buffer(cupy.asarray(m))
else:
m = None
if self.parameters.get("__array__") == "string":
from cudf.core.column.string import StringColumn
data = cudf.core.buffer.as_buffer(cupy.asarray(self._content.data))
# docs for StringColumn says there should be two children instead of a data=
> return StringColumn(
data=data,
children=(ind_buf,),
mask=m,
)
E TypeError: StringColumn.__init__() missing 2 required positional arguments: 'size' and 'dtype'
E
E This error occurred while calling
E
E ak.to_cudf(
E <Array ['hey', 'hi', 'hum'] type='3 * string'>
E )
StringColumn = <class 'cudf.core.column.string.StringColumn'>
buf = Buffer(owner=<cudf.core.buffer.buffer.BufferOwner object at 0x149418180750>, offset=0, size=16)
cont = <cudf.core.column.numerical.NumericalColumn object at 0x14941719f6b0>
[
104,
101,
121,
104,
105,
104,
117,
109
]
dtype: uint8
cudf = <module 'cudf' from '/home/ar1092/micromamba/envs/test-env/lib/python3.13/site-packages/cudf/__init__.py'>
cupy = <awkward._nplikes.cupy.Cupy object at 0x14944e7602f0>
data = Buffer(owner=<cudf.core.buffer.buffer.BufferOwner object at 0x14941703f5f0>, offset=0, size=8)
ind_buf = <cudf.core.column.numerical.NumericalColumn object at 0x1496d029a250>
[
0,
3,
5,
8
]
dtype: int32
index = array([0, 3, 5, 8], dtype=int32)
length = 3
m = None
mask = None
parse_version = <function parse at 0x1496d0d22ca0>
self = <ListOffsetArray len='3'>
<parameter name='__array__'>'string'</parameter>
<offsets><Index dtype='int64' len='4'>
[0 3 5 8]
</Index></offsets>
<content><NumpyArray dtype='uint8' len='8'>
<parameter name='__array__'>'char'</parameter>
[104 101 121 104 105 104 117 109]
</NumpyArray></content>
</ListOffsetArray>
/home/ar1092/micromamba/envs/test-env/lib/python3.13/site-packages/awkward/contents/listoffsetarray.py:2030: TypeError
@ianna It should be an easy fix. It's just that the StringColumn constructor has changed: https://github.com/rapidsai/cudf/blob/d4961e1df7807fbacb1cfd49394cb4b8508d9a82/python/cudf/cudf/core/column/string.py#L92-L100
I just don't have a GPU atm to test :)
@ianna It should be an easy fix. It's just that the
StringColumnconstructor has changed: https://github.com/rapidsai/cudf/blob/d4961e1df7807fbacb1cfd49394cb4b8508d9a82/python/cudf/cudf/core/column/string.py#L92-L100I just don't have a GPU atm to test :)
I agree. Thanks.