[BUG] datetime comparison is sensitive to ordering with different precision
Describe the bug Comparison of datetime objects are sensitive to run order when dtype differs.
Steps/Code to reproduce bug
import numpy as np
import cudf
dt1 = np.datetime64('2024-03-04T18:24:35.67') # ms
dt2 = np.datetime64('2024-03-04T18:24:35.670870310') # ns
cudf.Series([dt1]) > dt2 # Works
cudf.Series([dt2]) > dt1 # Works
dt1 < cudf.Series([dt2]) # Works
dt2 < cudf.Series([dt1]) # Fails with: `ValueError: Converting an integer to a NumPy datetime requires a specified unit`
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[12], line 11
9 cudf.Series([dt2]) > dt1 # Works
10 dt1 < cudf.Series([dt2]) # Works
---> 11 dt2 < cudf.Series([dt1]) # Fails
File ~/projects/holoviz/repos/holoviews/.pixi/envs/test-gpu/lib/python3.12/site-packages/cudf/core/indexed_frame.py:4828, in IndexedFrame.__array_ufunc__(self, ufunc, method, *inputs, **kwargs)
4827 def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
-> 4828 ret = super().__array_ufunc__(ufunc, method, *inputs, **kwargs)
4829 fname = ufunc.__name__
4831 if ret is not None:
File ~/projects/holoviz/repos/holoviews/.pixi/envs/test-gpu/lib/python3.12/site-packages/cudf/utils/performance_tracking.py:51, in _performance_tracking.<locals>.wrapper(*args, **kwargs)
43 if nvtx.enabled():
44 stack.enter_context(
45 nvtx.annotate(
46 message=func.__qualname__,
(...)
49 )
50 )
---> 51 return func(*args, **kwargs)
File ~/projects/holoviz/repos/holoviews/.pixi/envs/test-gpu/lib/python3.12/site-packages/cudf/core/frame.py:1540, in Frame.__array_ufunc__(self, ufunc, method, *inputs, **kwargs)
1538 @_performance_tracking
1539 def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
-> 1540 return _array_ufunc(self, ufunc, method, inputs, kwargs)
File ~/projects/holoviz/repos/holoviews/.pixi/envs/test-gpu/lib/python3.12/site-packages/cudf/utils/utils.py:92, in _array_ufunc(obj, ufunc, method, inputs, kwargs)
90 if fname == "float_power":
91 return getattr(obj, op)(other).astype(float)
---> 92 return getattr(obj, op)(other)
94 # Special handling for various unary operations.
95 if fname == "negative":
File ~/projects/holoviz/repos/holoviews/.pixi/envs/test-gpu/lib/python3.12/site-packages/cudf/core/mixins/mixin_factory.py:11, in _partialmethod.<locals>.wrapper(self, *args2, **kwargs2)
10 def wrapper(self, *args2, **kwargs2):
---> 11 return method(self, *args1, *args2, **kwargs1, **kwargs2)
File ~/projects/holoviz/repos/holoviews/.pixi/envs/test-gpu/lib/python3.12/site-packages/cudf/core/indexed_frame.py:4804, in IndexedFrame._binaryop(self, other, op, fill_value, can_reindex, *args, **kwargs)
4797 return NotImplemented
4799 level_names = (
4800 self._data._level_names if can_use_self_column_name else None
4801 )
4802 return self._from_data(
4803 ColumnAccessor(
-> 4804 type(self)._colwise_binop(operands, op),
4805 level_names=level_names,
4806 ),
4807 index=out_index,
4808 )
File ~/projects/holoviz/repos/holoviews/.pixi/envs/test-gpu/lib/python3.12/site-packages/cudf/utils/performance_tracking.py:51, in _performance_tracking.<locals>.wrapper(*args, **kwargs)
43 if nvtx.enabled():
44 stack.enter_context(
45 nvtx.annotate(
46 message=func.__qualname__,
(...)
49 )
50 )
---> 51 return func(*args, **kwargs)
File ~/projects/holoviz/repos/holoviews/.pixi/envs/test-gpu/lib/python3.12/site-packages/cudf/core/frame.py:1528, in Frame._colwise_binop(cls, operands, fn)
1520 assert False, "At least one operand must be a column."
1522 # TODO: Disable logical and binary operators between columns that
1523 # are not numerical using the new binops mixin.
1525 outcol = (
1526 getattr(operator, fn)(right_column, left_column)
1527 if reflect
-> 1528 else getattr(operator, fn)(left_column, right_column)
1529 )
1531 if output_mask is not None:
1532 outcol = outcol.set_mask(output_mask)
File ~/projects/holoviz/repos/holoviews/.pixi/envs/test-gpu/lib/python3.12/site-packages/cudf/core/mixins/mixin_factory.py:11, in _partialmethod.<locals>.wrapper(self, *args2, **kwargs2)
10 def wrapper(self, *args2, **kwargs2):
---> 11 return method(self, *args1, *args2, **kwargs1, **kwargs2)
File ~/projects/holoviz/repos/holoviews/.pixi/envs/test-gpu/lib/python3.12/site-packages/cudf/core/column/datetime.py:628, in DatetimeColumn._binaryop(self, other, op)
626 def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
627 reflect, op = self._check_reflected_op(op)
--> 628 other = self._wrap_binop_normalization(other)
629 if other is NotImplemented:
630 return NotImplemented
File ~/projects/holoviz/repos/holoviews/.pixi/envs/test-gpu/lib/python3.12/site-packages/cudf/core/column/column.py:583, in ColumnBase._wrap_binop_normalization(self, other)
580 return cudf.Scalar(other, dtype=self.dtype)
581 if isinstance(other, np.ndarray) and other.ndim == 0:
582 # Try and maintain the dtype
--> 583 other = other.dtype.type(other.item())
584 return self.normalize_binop_value(other)
ValueError: Converting an integer to a NumPy datetime requires a specified unit
Expected behavior A clear and concise description of what you expected to happen.
Environment overview (please complete the following information)
- Environment location: Bare-metal
- Method of cuDF install: conda
Environment details
Please run and paste the output of the cudf/print_env.sh script here, to gather any other relevant environment details
Additional context Add any other context about the problem here.
Thanks for reporting this. From some quick digging, it looks like the issue here is that numpy's datetime object upcasts to an array and attempts to use the __array_ufunc__ overload when the datetime is the LHS. That is why we get different results here depending on the argument ordering. We can avoid the confusing ordering behavior by wrapping the date objects in np.array, i.e.
cudf.Series([dt1]) > np.array([dt2]) # Also fails
cudf.Series([dt2]) > np.array([dt1]) # Also fails
We'll have to do a bit more digging to determine a fix. I expect that we need to add some extra preprocessing to handle the array case instead of the scalar case.