awkward icon indicating copy to clipboard operation
awkward copied to clipboard

`ak.to_cudf` test failing (`test_3051_to_cuda.py::test_strings`)

Open ikrommyd opened this issue 8 months ago • 3 comments

awkward v2.8.4

cudf-cu12   25.6.0
FAILED tests-cuda/test_3051_to_cuda.py::test_strings - TypeError: StringColumn.__init__() missing 2 required positional arguments: 'size' and 'dtype'

ikrommyd avatar Apr 24 '25 16:04 ikrommyd

    def test_strings():
        arr = ak.Array(["hey", "hi", "hum"])
>       out = ak.to_cudf(arr)
              ^^^^^^^^^^^^^^^

arr        = <Array ['hey', 'hi', 'hum'] type='3 * string'>

tests-cuda/test_3051_to_cuda.py:52: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
/home/ar1092/micromamba/envs/test-env/lib/python3.13/site-packages/awkward/_dispatch.py:41: in dispatch
    with OperationErrorContext(name, args, kwargs):
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        args       = (<Array ['hey', 'hi', 'hum'] type='3 * string'>,)
        dispatch   = <function to_cudf at 0x1496b7ff02c0>
        func       = <function to_cudf at 0x1496b7ff0220>
        kwargs     = {}
        name       = 'ak.to_cudf'
/home/ar1092/micromamba/envs/test-env/lib/python3.13/site-packages/awkward/_errors.py:80: in __exit__
    raise self.decorate_exception(exception_type, exception_value)
        exception_type = <class 'TypeError'>
        exception_value = TypeError("StringColumn.__init__() missing 2 required positional arguments: 'size' and 'dtype'")
        self       = <awkward._errors.OperationErrorContext object at 0x149416fe3890>
        traceback  = <traceback object at 0x149417096800>
/home/ar1092/micromamba/envs/test-env/lib/python3.13/site-packages/awkward/_dispatch.py:42: in dispatch
    gen_or_result = func(*args, **kwargs)
                    ^^^^^^^^^^^^^^^^^^^^^
        args       = (<Array ['hey', 'hi', 'hum'] type='3 * string'>,)
        dispatch   = <function to_cudf at 0x1496b7ff02c0>
        func       = <function to_cudf at 0x1496b7ff0220>
        kwargs     = {}
        name       = 'ak.to_cudf'
/home/ar1092/micromamba/envs/test-env/lib/python3.13/site-packages/awkward/operations/ak_to_cudf.py:22: in to_cudf
    return cudf.Series._from_column(array.layout._to_cudf(cudf, None, len(array)))
                                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        array      = <Array ['hey', 'hi', 'hum'] type='3 * string'>
        cudf       = <module 'cudf' from '/home/ar1092/micromamba/envs/test-env/lib/python3.13/site-packages/cudf/__init__.py'>
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <ListOffsetArray len='3'>
    <parameter name='__array__'>'string'</parameter>
    <offsets><Index dtype='int64' len='...'__array__'>'char'</parameter>
        [104 101 121 104 105 104 117 109]
    </NumpyArray></content>
</ListOffsetArray>
cudf = <module 'cudf' from '/home/ar1092/micromamba/envs/test-env/lib/python3.13/site-packages/cudf/__init__.py'>
mask = None, length = 3

    def _to_cudf(self, cudf: Any, mask: Content | None, length: int):
        from packaging.version import parse as parse_version
    
        cupy = Cupy.instance()
        index = materialize_if_virtual(self._offsets.raw(cupy))[0].astype("int32")
        buf = cudf.core.buffer.as_buffer(index)
    
        if parse_version(cudf.__version__) >= parse_version("24.10.00"):
            ind_buf = cudf.core.column.numerical.NumericalColumn(
                data=buf, dtype=index.dtype, mask=None, size=len(index)
            )
        else:
            ind_buf = cudf.core.column.numerical.NumericalColumn(
                buf, index.dtype, None, size=len(index)
            )
        cont = self._content._to_cudf(cudf, None, len(self._content))
        if mask is not None:
            m = np._module.packbits(mask, bitorder="little")
            if m.nbytes % 64:
                m = cupy.resize(m, ((m.nbytes // 64) + 1) * 64)
            m = cudf.core.buffer.as_buffer(cupy.asarray(m))
        else:
            m = None
        if self.parameters.get("__array__") == "string":
            from cudf.core.column.string import StringColumn
    
            data = cudf.core.buffer.as_buffer(cupy.asarray(self._content.data))
            # docs for StringColumn says there should be two children instead of a data=
>           return StringColumn(
                data=data,
                children=(ind_buf,),
                mask=m,
            )
E           TypeError: StringColumn.__init__() missing 2 required positional arguments: 'size' and 'dtype'
E           
E           This error occurred while calling
E           
E               ak.to_cudf(
E                   <Array ['hey', 'hi', 'hum'] type='3 * string'>
E               )

StringColumn = <class 'cudf.core.column.string.StringColumn'>
buf        = Buffer(owner=<cudf.core.buffer.buffer.BufferOwner object at 0x149418180750>, offset=0, size=16)
cont       = <cudf.core.column.numerical.NumericalColumn object at 0x14941719f6b0>
[
  104,
  101,
  121,
  104,
  105,
  104,
  117,
  109
]
dtype: uint8
cudf       = <module 'cudf' from '/home/ar1092/micromamba/envs/test-env/lib/python3.13/site-packages/cudf/__init__.py'>
cupy       = <awkward._nplikes.cupy.Cupy object at 0x14944e7602f0>
data       = Buffer(owner=<cudf.core.buffer.buffer.BufferOwner object at 0x14941703f5f0>, offset=0, size=8)
ind_buf    = <cudf.core.column.numerical.NumericalColumn object at 0x1496d029a250>
[
  0,
  3,
  5,
  8
]
dtype: int32
index      = array([0, 3, 5, 8], dtype=int32)
length     = 3
m          = None
mask       = None
parse_version = <function parse at 0x1496d0d22ca0>
self       = <ListOffsetArray len='3'>
    <parameter name='__array__'>'string'</parameter>
    <offsets><Index dtype='int64' len='4'>
        [0 3 5 8]
    </Index></offsets>
    <content><NumpyArray dtype='uint8' len='8'>
        <parameter name='__array__'>'char'</parameter>
        [104 101 121 104 105 104 117 109]
    </NumpyArray></content>
</ListOffsetArray>

/home/ar1092/micromamba/envs/test-env/lib/python3.13/site-packages/awkward/contents/listoffsetarray.py:2030: TypeError

ianna avatar Jun 14 '25 14:06 ianna

@ianna It should be an easy fix. It's just that the StringColumn constructor has changed: https://github.com/rapidsai/cudf/blob/d4961e1df7807fbacb1cfd49394cb4b8508d9a82/python/cudf/cudf/core/column/string.py#L92-L100

I just don't have a GPU atm to test :)

ikrommyd avatar Jun 16 '25 18:06 ikrommyd

@ianna It should be an easy fix. It's just that the StringColumn constructor has changed: https://github.com/rapidsai/cudf/blob/d4961e1df7807fbacb1cfd49394cb4b8508d9a82/python/cudf/cudf/core/column/string.py#L92-L100

I just don't have a GPU atm to test :)

I agree. Thanks.

ianna avatar Jun 16 '25 18:06 ianna