dask-sql
dask-sql copied to clipboard
[BUG][GPU Error Bug] "SELECT <number> FROM <table> HAVING EVERY(<boolean>)" brings Error
What happened:
"SELECT <number> FROM <table> HAVING EVERY(<boolean>)" brings error, when using GPU.
However it is able to output result, when using CPU.
What you expected to happen:
It will not bring error, when using GPU.
Minimal Complete Verifiable Example:
import pandas as pd
import dask.dataframe as dd
from dask_sql import Context
c = Context()
df0 = pd.DataFrame({
'c0': ['A'],
'c1': ['B'],
})
t0 = dd.from_pandas(df0, npartitions=1)
c.create_table('t0', t0, gpu=False)
c.create_table('t0_gpu', t0, gpu=True)
print('CPU Result:')
result1= c.sql("SELECT 1 FROM t0 HAVING EVERY(true)").compute()
print(result1)
print('GPU Result:')
result2= c.sql("SELECT 1 FROM t0_gpu HAVING EVERY(true)").compute()
print(result2)
Result:
INFO:numba.cuda.cudadrv.driver:init
CPU Result:
Int64(1)
0 1
GPU Result:
Traceback (most recent call last):
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask/dataframe/utils.py", line 193, in raise_on_meta_error
yield
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask/dataframe/core.py", line 6793, in _emulate
return func(*_extract_meta(args, True), **_extract_meta(kwargs, True))
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask/dataframe/groupby.py", line 1203, in _groupby_apply_funcs
r = func(grouped, **func_kwargs)
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask/dataframe/groupby.py", line 1249, in _apply_func_to_column
return func(df_like[column])
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask_sql/physical/rel/logical/aggregate.py", line 152, in <lambda>
dd.Aggregation("every", lambda s: s.all(), lambda s0: s0.all())
AttributeError: 'SeriesGroupBy' object has no attribute 'all'
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/tmp/bug.py", line 21, in <module>
result2= c.sql("SELECT 1 FROM t0_gpu HAVING EVERY(true)").compute()
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask_sql/context.py", line 513, in sql
return self._compute_table_from_rel(rel, return_futures)
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask_sql/context.py", line 839, in _compute_table_from_rel
dc = RelConverter.convert(rel, context=self)
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask_sql/physical/rel/convert.py", line 61, in convert
df = plugin_instance.convert(rel, context=context)
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask_sql/physical/rel/logical/project.py", line 28, in convert
(dc,) = self.assert_inputs(rel, 1, context)
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask_sql/physical/rel/base.py", line 84, in assert_inputs
return [RelConverter.convert(input_rel, context) for input_rel in input_rels]
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask_sql/physical/rel/base.py", line 84, in <listcomp>
return [RelConverter.convert(input_rel, context) for input_rel in input_rels]
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask_sql/physical/rel/convert.py", line 61, in convert
df = plugin_instance.convert(rel, context=context)
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask_sql/physical/rel/logical/filter.py", line 56, in convert
(dc,) = self.assert_inputs(rel, 1, context)
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask_sql/physical/rel/base.py", line 84, in assert_inputs
return [RelConverter.convert(input_rel, context) for input_rel in input_rels]
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask_sql/physical/rel/base.py", line 84, in <listcomp>
return [RelConverter.convert(input_rel, context) for input_rel in input_rels]
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask_sql/physical/rel/convert.py", line 61, in convert
df = plugin_instance.convert(rel, context=context)
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask_sql/physical/rel/logical/aggregate.py", line 231, in convert
df_agg, output_column_order, cc = self._do_aggregations(
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask_sql/physical/rel/logical/aggregate.py", line 312, in _do_aggregations
df_result = self._perform_aggregation(
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask_sql/physical/rel/logical/aggregate.py", line 551, in _perform_aggregation
agg_result = grouped_df.agg(aggregations_dict, **groupby_agg_options)
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask/dataframe/groupby.py", line 374, in wrapper
return func(self, *args, **kwargs)
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask/dataframe/groupby.py", line 2884, in agg
return self.aggregate(
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/nvtx/nvtx.py", line 101, in inner
result = func(*args, **kwargs)
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask_cudf/groupby.py", line 218, in aggregate
return super().aggregate(
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask/dataframe/groupby.py", line 2873, in aggregate
return super().aggregate(
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask/dataframe/groupby.py", line 2369, in aggregate
result = aca(
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask/dataframe/core.py", line 6746, in apply_concat_apply
meta_chunk = _emulate(chunk, *args, udf=True, **chunk_kwargs)
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask/dataframe/core.py", line 6792, in _emulate
with raise_on_meta_error(funcname(func), udf=udf), check_numeric_only_deprecation():
File "/opt/conda/envs/rapids/lib/python3.10/contextlib.py", line 153, in __exit__
self.gen.throw(typ, value, traceback)
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask/dataframe/utils.py", line 214, in raise_on_meta_error
raise ValueError(msg) from e
ValueError: Metadata inference failed in `_groupby_apply_funcs`.
You have supplied a custom function and Dask is unable to
determine the type of output that that function returns.
To resolve this please provide a meta= keyword.
The docstring of the Dask function you ran should have more information.
Original error is below:
------------------------
AttributeError("'SeriesGroupBy' object has no attribute 'all'")
Traceback:
---------
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask/dataframe/utils.py", line 193, in raise_on_meta_error
yield
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask/dataframe/core.py", line 6793, in _emulate
return func(*_extract_meta(args, True), **_extract_meta(kwargs, True))
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask/dataframe/groupby.py", line 1203, in _groupby_apply_funcs
r = func(grouped, **func_kwargs)
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask/dataframe/groupby.py", line 1249, in _apply_func_to_column
return func(df_like[column])
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask_sql/physical/rel/logical/aggregate.py", line 152, in <lambda>
dd.Aggregation("every", lambda s: s.all(), lambda s0: s0.all())
Anything else we need to know?:
Environment:
- dask-sql version: 2023.6.0
- Python version: Python 3.10.11
- Operating System: Ubuntu22.04
- Install method (conda, pip, source): Docker deploy by https://hub.docker.com/layers/rapidsai/rapidsai-dev/23.06-cuda11.8-devel-ubuntu22.04-py3.10/images/sha256-cfbb61fdf7227b090a435a2e758114f3f1c31872ed8dbd96e5e564bb5fd184a7?context=explore