dask-sql icon indicating copy to clipboard operation
dask-sql copied to clipboard

[BUG] Lowercase column names can be selected from table but mixed-case columns cannot

Open beckernick opened this issue 3 years ago • 0 comments

When a column name contains non-lower case characters, the parser fails to recognize it. It looks like the parser converts the names to lower case when evaluating the schema.

import pandas as pd
from dask_sql import Context
import dask

c = Context()

df = pd.DataFrame({"lower":range(1000), "camelCase": range(1000), "Titlecase":range(1000)})

c.create_table("df", df)

query = """
SELECT lower
FROM df
LIMIT 5
"""
print(c.sql(query).compute())

query = """
SELECT Titlecase
FROM df
LIMIT 5
"""
print(c.sql(query).compute())

# query = """
# SELECT camelCase
# FROM df
# LIMIT 5
# """
# print(c.sql(query).compute())
   lower
0      0
1      1
2      2
3      3
4      4

---------------------------------------------------------------------------
ParsingException                          Traceback (most recent call last)
Input In [1], in <cell line: 23>()
     16 print(c.sql(query).compute())
     18 query = """
     19 SELECT Titlecase
     20 FROM df
     21 LIMIT 5
     22 """
---> 23 print(c.sql(query).compute())

File ~/miniconda3/envs/rapids-22.10-dasksql/lib/python3.9/site-packages/dask_sql/context.py:493, in Context.sql(self, sql, return_futures, dataframes, gpu, config_options)
    490         self.create_table(df_name, df, gpu=gpu)
    492 if isinstance(sql, str):
--> 493     rel, _ = self._get_ral(sql)
    494 elif isinstance(sql, LogicalPlan):
    495     rel = sql

File ~/miniconda3/envs/rapids-22.10-dasksql/lib/python3.9/site-packages/dask_sql/context.py:811, in Context._get_ral(self, sql)
    809     nonOptimizedRel = self.context.logical_relational_algebra(sqlTree[0])
    810 except DFParsingException as pe:
--> 811     raise ParsingException(sql, str(pe)) from None
    813 # Optimize the `LogicalPlan` or skip if configured
    814 if dask_config.get("sql.optimize"):

ParsingException: SchemaError(FieldNotFound { qualifier: None, name: "titlecase", valid_fields: Some(["df.lower", "df.camelCase", "df.Titlecase"]) })
# packages in environment at /home/nicholasb/miniconda3/envs/rapids-22.10-dasksql:
cucim                     22.10.00a220930 cuda_11_py39_gf4229e3_51    rapidsai-nightly
cudf                      22.10.00a220929 cuda_11_py39_g5fad28942e_286    rapidsai-nightly
cudf_kafka                22.10.00a220929 py39_g920b58f948_288    rapidsai-nightly
cugraph                   22.10.00a220930 cuda11_py39_g91598080_88    rapidsai-nightly
cuml                      22.10.00a220930 cuda11_py39_g96da84cc1_50    rapidsai-nightly
cusignal                  22.10.00a220930 py39_gd075e87_12    rapidsai-nightly
cuspatial                 22.10.00a220930 py39_g6922ef5_55    rapidsai-nightly
custreamz                 22.10.00a220929 py39_g920b58f948_288    rapidsai-nightly
cuxfilter                 22.10.00a220930 py39_ge1aa0b2_17    rapidsai-nightly
dask                      2022.7.1           pyhd8ed1ab_0    conda-forge
dask-core                 2022.7.1           pyhd8ed1ab_0    conda-forge
dask-cuda                 22.10.00a220930 py39_gc0ae66c_20    rapidsai-nightly
dask-cudf                 22.10.00a220929 cuda_11_py39_g920b58f948_288    rapidsai-nightly
dask-sql                  2022.9.1a220928 py39_ga7583b5_13    dask/label/dev
datashader                0.13.1a                    py_0    rapidsai-nightly
libcucim                  22.10.00a220930 cuda11_gf4229e3_51    rapidsai-nightly
libcudf                   22.10.00a220929 cuda11_g920b58f948_288    rapidsai-nightly
libcudf_kafka             22.10.00a220929 g920b58f948_288    rapidsai-nightly
libcugraph                22.10.00a220930 cuda11_g91598080_88    rapidsai-nightly
libcugraph_etl            22.10.00a220930 cuda11_g91598080_88    rapidsai-nightly
libcugraphops             22.10.00a220930 cuda11_g553bacf_29    rapidsai-nightly
libcuml                   22.10.00a220930 cuda11_g96da84cc1_50    rapidsai-nightly
libcumlprims              22.10.00a220804 cuda11_g2adfe69_0    rapidsai-nightly
libcuspatial              22.10.00a220930 cuda11_g6922ef5_55    rapidsai-nightly
libraft-distance          22.10.00a220930 cuda11_g2e98138c_57    rapidsai-nightly
libraft-headers           22.10.00a220930 cuda11_g2e98138c_57    rapidsai-nightly
libraft-nn                22.10.00a220930 cuda11_g2e98138c_57    rapidsai-nightly
librmm                    22.10.00a220929 cuda11_g8a3a552e_28    rapidsai-nightly
libxgboost                1.6.2dev.rapidsai22.10       cuda_11_0    rapidsai-nightly
ptxcompiler               0.6.0           cuda_11_py39_g455bc7f_2    rapidsai-nightly
py-xgboost                1.6.2dev.rapidsai22.10  cuda_11_py39_0    rapidsai-nightly
pylibcugraph              22.10.00a220930 cuda11_py39_g91598080_88    rapidsai-nightly
pylibraft                 22.10.00a220930 cuda11_py39_g2e98138c_57    rapidsai-nightly
raft-dask                 22.10.00a220930 cuda11_py39_g2e98138c_57    rapidsai-nightly
rapids                    22.10.00a220930 cuda11_py39_gbce77c5_69    rapidsai-nightly
rapids-xgboost            22.10.00a220930 cuda11_py39_gbce77c5_69    rapidsai-nightly
rmm                       22.10.00a220929 cuda11_py39_g8a3a552e_28    rapidsai-nightly
ucx-proc                  1.0.0                       gpu    rapidsai-nightly
ucx-py                    0.28.00a220927  py39_g2ab6070_27    rapidsai-nightly
xgboost                   1.6.2dev.rapidsai22.10  cuda_11_py39_0    rapidsai-nightly

beckernick avatar Sep 30 '22 20:09 beckernick