SQLite.jl
SQLite.jl copied to clipboard
[BUG] Query Not Running Using SQLite.jl - Runs within `litecli`
So, I have the following SQL query:
DELETE FROM "COHORT"
WHERE cohort_definition_id = 1;
INSERT INTO "COHORT"
SELECT
1 AS "cohort_definition_id",
"drug_era_7"."subject_id",
"drug_era_7"."cohort_start_date",
"drug_era_7"."cohort_end_date"
FROM (
SELECT
"drug_era_6"."person_id" AS "subject_id",
MIN("drug_era_6"."start_date") AS "cohort_start_date",
MAX("drug_era_6"."end_date") AS "cohort_end_date"
FROM (
SELECT
"drug_era_5"."person_id",
(SUM("drug_era_5"."bump") OVER (PARTITION BY "drug_era_5"."person_id" ORDER BY "drug_era_5"."start_date", (- "drug_era_5"."bump") ROWS UNBOUNDED PRECEDING)) AS "group",
"drug_era_5"."start_date",
"drug_era_5"."end_date"
FROM (
SELECT
"drug_era_4"."person_id",
(CASE WHEN ("drug_era_4"."start_date" <= (MAX("drug_era_4"."op_end_date") OVER (PARTITION BY "drug_era_4"."person_id" ORDER BY "drug_era_4"."start_date" ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING))) THEN 0 ELSE 1 END) AS "bump",
"drug_era_4"."start_date",
"drug_era_4"."op_end_date" AS "end_date"
FROM (
SELECT
"drug_era_3"."person_id",
"drug_era_3"."start_date",
"drug_era_3"."op_end_date",
(ROW_NUMBER() OVER (PARTITION BY "drug_era_3"."person_id" ORDER BY "drug_era_3"."start_date")) AS "row_number"
FROM (
SELECT
"drug_era_2"."person_id",
"drug_era_2"."start_date",
"op_1"."end_date" AS "op_end_date",
(ROW_NUMBER() OVER (PARTITION BY "drug_era_2"."person_id" ORDER BY "drug_era_2"."sort_date")) AS "row_number"
FROM (
SELECT
"drug_era_1"."person_id",
"drug_era_1"."drug_era_start_date" AS "start_date",
"drug_era_1"."drug_era_start_date" AS "sort_date"
FROM ""."drug_era" AS "drug_era_1"
WHERE ("drug_era_1"."drug_concept_id" IN (
SELECT "concept_1"."concept_id"
FROM ""."concept" AS "concept_1"
WHERE ("concept_1"."concept_id" = 1118084)
))
) AS "drug_era_2"
JOIN (
SELECT
"observation_period_1"."person_id",
"observation_period_1"."observation_period_end_date" AS "end_date",
"observation_period_1"."observation_period_start_date" AS "start_date"
FROM ""."observation_period" AS "observation_period_1"
) AS "op_1" ON ("drug_era_2"."person_id" = "op_1"."person_id")
WHERE
("op_1"."start_date" <= "drug_era_2"."start_date") AND
("drug_era_2"."start_date" <= "op_1"."end_date")
) AS "drug_era_3"
WHERE ("drug_era_3"."row_number" = 1)
) AS "drug_era_4"
WHERE ("drug_era_4"."row_number" = 1)
) AS "drug_era_5"
) AS "drug_era_6"
GROUP BY
"drug_era_6"."person_id",
"drug_era_6"."group"
) AS "drug_era_7";
I have a SQLite.DB set up and try to run this SQL as follows:
DBInterface.execute(db, my_sql) # Does not work
SQLite.execute(db, my_sql) # Does not work
However, when I run this exact same SQL within the tool, litecli, it works as expected in deleting and creating rows. What is going on here?
Thanks!
~ tcp :deciduous_tree:
P.S. If you want to reproduce this behavior locally on your machine, try the following:
using HealthSampleData
using DataFrames
using SQLite
using DBInterface
eunomia = Eunomia()
conn = SQLite.DB(eunomia)
sql = """
DELETE FROM "COHORT"
WHERE cohort_definition_id = 1;
INSERT INTO "COHORT"
SELECT
1 AS "cohort_definition_id",
"drug_era_7"."subject_id",
"drug_era_7"."cohort_start_date",
"drug_era_7"."cohort_end_date"
FROM (
SELECT
"drug_era_6"."person_id" AS "subject_id",
MIN("drug_era_6"."start_date") AS "cohort_start_date",
MAX("drug_era_6"."end_date") AS "cohort_end_date"
FROM (
SELECT
"drug_era_5"."person_id",
(SUM("drug_era_5"."bump") OVER (PARTITION BY "drug_era_5"."person_id" ORDER BY "drug_era_5"."start_date", (- "drug_era_5"."bump") ROWS UNBOUNDED PRECEDING)) AS "group",
"drug_era_5"."start_date",
"drug_era_5"."end_date"
FROM (
SELECT
"drug_era_4"."person_id",
(CASE WHEN ("drug_era_4"."start_date" <= (MAX("drug_era_4"."op_end_date") OVER (PARTITION BY "drug_era_4"."person_id" ORDER BY "drug_era_4"."start_date" ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING))) THEN 0 ELSE 1 END) AS "bump",
"drug_era_4"."start_date",
"drug_era_4"."op_end_date" AS "end_date"
FROM (
SELECT
"drug_era_3"."person_id",
"drug_era_3"."start_date",
"drug_era_3"."op_end_date",
(ROW_NUMBER() OVER (PARTITION BY "drug_era_3"."person_id" ORDER BY "drug_era_3"."start_date")) AS "row_number"
FROM (
SELECT
"drug_era_2"."person_id",
"drug_era_2"."start_date",
"op_1"."end_date" AS "op_end_date",
(ROW_NUMBER() OVER (PARTITION BY "drug_era_2"."person_id" ORDER BY "drug_era_2"."sort_date")) AS "row_number"
FROM (
SELECT
"drug_era_1"."person_id",
"drug_era_1"."drug_era_start_date" AS "start_date",
"drug_era_1"."drug_era_start_date" AS "sort_date"
FROM ""."drug_era" AS "drug_era_1"
WHERE ("drug_era_1"."drug_concept_id" IN (
SELECT "concept_1"."concept_id"
FROM ""."concept" AS "concept_1"
WHERE ("concept_1"."concept_id" = 1118084)
))
) AS "drug_era_2"
JOIN (
SELECT
"observation_period_1"."person_id",
"observation_period_1"."observation_period_end_date" AS "end_date",
"observation_period_1"."observation_period_start_date" AS "start_date"
FROM ""."observation_period" AS "observation_period_1"
) AS "op_1" ON ("drug_era_2"."person_id" = "op_1"."person_id")
WHERE
("op_1"."start_date" <= "drug_era_2"."start_date") AND
("drug_era_2"."start_date" <= "op_1"."end_date")
) AS "drug_era_3"
WHERE ("drug_era_3"."row_number" = 1)
) AS "drug_era_4"
WHERE ("drug_era_4"."row_number" = 1)
) AS "drug_era_5"
) AS "drug_era_6"
GROUP BY
"drug_era_6"."person_id",
"drug_era_6"."group"
) AS "drug_era_7";
"""
DBInterface.execute(conn, sql)
DBInterface.execute(conn, "SELECT * FROM COHORT LIMIT 5;") |> DataFrame
# Results in empty dataframe - should not be empty
Because your SQL includes multiple statements, and we haven't implemented DBInterface.executemultiple correctly so that the default behavior is only executing the first statement.
A workaround is to execute single statement sequentially.
We can also implement DBInterface.executemultiple correctly with the method described in SQLite forum which is similar with executescript in Python.
All C API functions in SQLite have been exposed now so you can propose a PR if you are interested in it.