dask-sql
dask-sql copied to clipboard
[BUG] Lowercase column names can be selected from table but mixed-case columns cannot
When a column name contains non-lower case characters, the parser fails to recognize it. It looks like the parser converts the names to lower case when evaluating the schema.
import pandas as pd
from dask_sql import Context
import dask
c = Context()
df = pd.DataFrame({"lower":range(1000), "camelCase": range(1000), "Titlecase":range(1000)})
c.create_table("df", df)
query = """
SELECT lower
FROM df
LIMIT 5
"""
print(c.sql(query).compute())
query = """
SELECT Titlecase
FROM df
LIMIT 5
"""
print(c.sql(query).compute())
# query = """
# SELECT camelCase
# FROM df
# LIMIT 5
# """
# print(c.sql(query).compute())
lower
0 0
1 1
2 2
3 3
4 4
---------------------------------------------------------------------------
ParsingException Traceback (most recent call last)
Input In [1], in <cell line: 23>()
16 print(c.sql(query).compute())
18 query = """
19 SELECT Titlecase
20 FROM df
21 LIMIT 5
22 """
---> 23 print(c.sql(query).compute())
File ~/miniconda3/envs/rapids-22.10-dasksql/lib/python3.9/site-packages/dask_sql/context.py:493, in Context.sql(self, sql, return_futures, dataframes, gpu, config_options)
490 self.create_table(df_name, df, gpu=gpu)
492 if isinstance(sql, str):
--> 493 rel, _ = self._get_ral(sql)
494 elif isinstance(sql, LogicalPlan):
495 rel = sql
File ~/miniconda3/envs/rapids-22.10-dasksql/lib/python3.9/site-packages/dask_sql/context.py:811, in Context._get_ral(self, sql)
809 nonOptimizedRel = self.context.logical_relational_algebra(sqlTree[0])
810 except DFParsingException as pe:
--> 811 raise ParsingException(sql, str(pe)) from None
813 # Optimize the `LogicalPlan` or skip if configured
814 if dask_config.get("sql.optimize"):
ParsingException: SchemaError(FieldNotFound { qualifier: None, name: "titlecase", valid_fields: Some(["df.lower", "df.camelCase", "df.Titlecase"]) })
# packages in environment at /home/nicholasb/miniconda3/envs/rapids-22.10-dasksql:
cucim 22.10.00a220930 cuda_11_py39_gf4229e3_51 rapidsai-nightly
cudf 22.10.00a220929 cuda_11_py39_g5fad28942e_286 rapidsai-nightly
cudf_kafka 22.10.00a220929 py39_g920b58f948_288 rapidsai-nightly
cugraph 22.10.00a220930 cuda11_py39_g91598080_88 rapidsai-nightly
cuml 22.10.00a220930 cuda11_py39_g96da84cc1_50 rapidsai-nightly
cusignal 22.10.00a220930 py39_gd075e87_12 rapidsai-nightly
cuspatial 22.10.00a220930 py39_g6922ef5_55 rapidsai-nightly
custreamz 22.10.00a220929 py39_g920b58f948_288 rapidsai-nightly
cuxfilter 22.10.00a220930 py39_ge1aa0b2_17 rapidsai-nightly
dask 2022.7.1 pyhd8ed1ab_0 conda-forge
dask-core 2022.7.1 pyhd8ed1ab_0 conda-forge
dask-cuda 22.10.00a220930 py39_gc0ae66c_20 rapidsai-nightly
dask-cudf 22.10.00a220929 cuda_11_py39_g920b58f948_288 rapidsai-nightly
dask-sql 2022.9.1a220928 py39_ga7583b5_13 dask/label/dev
datashader 0.13.1a py_0 rapidsai-nightly
libcucim 22.10.00a220930 cuda11_gf4229e3_51 rapidsai-nightly
libcudf 22.10.00a220929 cuda11_g920b58f948_288 rapidsai-nightly
libcudf_kafka 22.10.00a220929 g920b58f948_288 rapidsai-nightly
libcugraph 22.10.00a220930 cuda11_g91598080_88 rapidsai-nightly
libcugraph_etl 22.10.00a220930 cuda11_g91598080_88 rapidsai-nightly
libcugraphops 22.10.00a220930 cuda11_g553bacf_29 rapidsai-nightly
libcuml 22.10.00a220930 cuda11_g96da84cc1_50 rapidsai-nightly
libcumlprims 22.10.00a220804 cuda11_g2adfe69_0 rapidsai-nightly
libcuspatial 22.10.00a220930 cuda11_g6922ef5_55 rapidsai-nightly
libraft-distance 22.10.00a220930 cuda11_g2e98138c_57 rapidsai-nightly
libraft-headers 22.10.00a220930 cuda11_g2e98138c_57 rapidsai-nightly
libraft-nn 22.10.00a220930 cuda11_g2e98138c_57 rapidsai-nightly
librmm 22.10.00a220929 cuda11_g8a3a552e_28 rapidsai-nightly
libxgboost 1.6.2dev.rapidsai22.10 cuda_11_0 rapidsai-nightly
ptxcompiler 0.6.0 cuda_11_py39_g455bc7f_2 rapidsai-nightly
py-xgboost 1.6.2dev.rapidsai22.10 cuda_11_py39_0 rapidsai-nightly
pylibcugraph 22.10.00a220930 cuda11_py39_g91598080_88 rapidsai-nightly
pylibraft 22.10.00a220930 cuda11_py39_g2e98138c_57 rapidsai-nightly
raft-dask 22.10.00a220930 cuda11_py39_g2e98138c_57 rapidsai-nightly
rapids 22.10.00a220930 cuda11_py39_gbce77c5_69 rapidsai-nightly
rapids-xgboost 22.10.00a220930 cuda11_py39_gbce77c5_69 rapidsai-nightly
rmm 22.10.00a220929 cuda11_py39_g8a3a552e_28 rapidsai-nightly
ucx-proc 1.0.0 gpu rapidsai-nightly
ucx-py 0.28.00a220927 py39_g2ab6070_27 rapidsai-nightly
xgboost 1.6.2dev.rapidsai22.10 cuda_11_py39_0 rapidsai-nightly