SQLite.jl icon indicating copy to clipboard operation
SQLite.jl copied to clipboard

[BUG] Query Not Running Using SQLite.jl - Runs within `litecli`

Open TheCedarPrince opened this issue 3 years ago • 2 comments

So, I have the following SQL query:

DELETE FROM "COHORT"
WHERE cohort_definition_id = 1;
INSERT INTO "COHORT"
SELECT
  1 AS "cohort_definition_id",
  "drug_era_7"."subject_id",
  "drug_era_7"."cohort_start_date",
  "drug_era_7"."cohort_end_date"
FROM (
  SELECT
    "drug_era_6"."person_id" AS "subject_id",
    MIN("drug_era_6"."start_date") AS "cohort_start_date",
    MAX("drug_era_6"."end_date") AS "cohort_end_date"
  FROM (
    SELECT
      "drug_era_5"."person_id",
      (SUM("drug_era_5"."bump") OVER (PARTITION BY "drug_era_5"."person_id" ORDER BY "drug_era_5"."start_date", (- "drug_era_5"."bump") ROWS UNBOUNDED PRECEDING)) AS "group",
      "drug_era_5"."start_date",
      "drug_era_5"."end_date"
    FROM (
      SELECT
        "drug_era_4"."person_id",
        (CASE WHEN ("drug_era_4"."start_date" <= (MAX("drug_era_4"."op_end_date") OVER (PARTITION BY "drug_era_4"."person_id" ORDER BY "drug_era_4"."start_date" ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING))) THEN 0 ELSE 1 END) AS "bump",
        "drug_era_4"."start_date",
        "drug_era_4"."op_end_date" AS "end_date"
      FROM (
        SELECT
          "drug_era_3"."person_id",
          "drug_era_3"."start_date",
          "drug_era_3"."op_end_date",
          (ROW_NUMBER() OVER (PARTITION BY "drug_era_3"."person_id" ORDER BY "drug_era_3"."start_date")) AS "row_number"
        FROM (
          SELECT
            "drug_era_2"."person_id",
            "drug_era_2"."start_date",
            "op_1"."end_date" AS "op_end_date",
            (ROW_NUMBER() OVER (PARTITION BY "drug_era_2"."person_id" ORDER BY "drug_era_2"."sort_date")) AS "row_number"
          FROM (
            SELECT
              "drug_era_1"."person_id",
              "drug_era_1"."drug_era_start_date" AS "start_date",
              "drug_era_1"."drug_era_start_date" AS "sort_date"
            FROM ""."drug_era" AS "drug_era_1"
            WHERE ("drug_era_1"."drug_concept_id" IN (
              SELECT "concept_1"."concept_id"
              FROM ""."concept" AS "concept_1"
              WHERE ("concept_1"."concept_id" = 1118084)
            ))
          ) AS "drug_era_2"
          JOIN (
            SELECT
              "observation_period_1"."person_id",
              "observation_period_1"."observation_period_end_date" AS "end_date",
              "observation_period_1"."observation_period_start_date" AS "start_date"
            FROM ""."observation_period" AS "observation_period_1"
          ) AS "op_1" ON ("drug_era_2"."person_id" = "op_1"."person_id")
          WHERE
            ("op_1"."start_date" <= "drug_era_2"."start_date") AND
            ("drug_era_2"."start_date" <= "op_1"."end_date")
        ) AS "drug_era_3"
        WHERE ("drug_era_3"."row_number" = 1)
      ) AS "drug_era_4"
      WHERE ("drug_era_4"."row_number" = 1)
    ) AS "drug_era_5"
  ) AS "drug_era_6"
  GROUP BY
    "drug_era_6"."person_id",
    "drug_era_6"."group"
) AS "drug_era_7";

I have a SQLite.DB set up and try to run this SQL as follows:

DBInterface.execute(db, my_sql) # Does not work
SQLite.execute(db, my_sql) # Does not work

However, when I run this exact same SQL within the tool, litecli, it works as expected in deleting and creating rows. What is going on here?

Thanks!

~ tcp :deciduous_tree:

TheCedarPrince avatar Sep 24 '22 21:09 TheCedarPrince

P.S. If you want to reproduce this behavior locally on your machine, try the following:

using HealthSampleData
using DataFrames
using SQLite
using DBInterface

eunomia = Eunomia()

conn = SQLite.DB(eunomia)

sql = """
DELETE FROM "COHORT"
WHERE cohort_definition_id = 1;
INSERT INTO "COHORT"
SELECT
  1 AS "cohort_definition_id",
  "drug_era_7"."subject_id",
  "drug_era_7"."cohort_start_date",
  "drug_era_7"."cohort_end_date"
FROM (
  SELECT
    "drug_era_6"."person_id" AS "subject_id",
    MIN("drug_era_6"."start_date") AS "cohort_start_date",
    MAX("drug_era_6"."end_date") AS "cohort_end_date"
  FROM (
    SELECT
      "drug_era_5"."person_id",
      (SUM("drug_era_5"."bump") OVER (PARTITION BY "drug_era_5"."person_id" ORDER BY "drug_era_5"."start_date", (- "drug_era_5"."bump") ROWS UNBOUNDED PRECEDING)) AS "group",
      "drug_era_5"."start_date",
      "drug_era_5"."end_date"
    FROM (
      SELECT
        "drug_era_4"."person_id",
        (CASE WHEN ("drug_era_4"."start_date" <= (MAX("drug_era_4"."op_end_date") OVER (PARTITION BY "drug_era_4"."person_id" ORDER BY "drug_era_4"."start_date" ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING))) THEN 0 ELSE 1 END) AS "bump",
        "drug_era_4"."start_date",
        "drug_era_4"."op_end_date" AS "end_date"
      FROM (
        SELECT
          "drug_era_3"."person_id",
          "drug_era_3"."start_date",
          "drug_era_3"."op_end_date",
          (ROW_NUMBER() OVER (PARTITION BY "drug_era_3"."person_id" ORDER BY "drug_era_3"."start_date")) AS "row_number"
        FROM (
          SELECT
            "drug_era_2"."person_id",
            "drug_era_2"."start_date",
            "op_1"."end_date" AS "op_end_date",
            (ROW_NUMBER() OVER (PARTITION BY "drug_era_2"."person_id" ORDER BY "drug_era_2"."sort_date")) AS "row_number"
          FROM (
            SELECT
              "drug_era_1"."person_id",
              "drug_era_1"."drug_era_start_date" AS "start_date",
              "drug_era_1"."drug_era_start_date" AS "sort_date"
            FROM ""."drug_era" AS "drug_era_1"
            WHERE ("drug_era_1"."drug_concept_id" IN (
              SELECT "concept_1"."concept_id"
              FROM ""."concept" AS "concept_1"
              WHERE ("concept_1"."concept_id" = 1118084)
            ))
          ) AS "drug_era_2"
          JOIN (
            SELECT
              "observation_period_1"."person_id",
              "observation_period_1"."observation_period_end_date" AS "end_date",
              "observation_period_1"."observation_period_start_date" AS "start_date"
            FROM ""."observation_period" AS "observation_period_1"
          ) AS "op_1" ON ("drug_era_2"."person_id" = "op_1"."person_id")
          WHERE
            ("op_1"."start_date" <= "drug_era_2"."start_date") AND
            ("drug_era_2"."start_date" <= "op_1"."end_date")
        ) AS "drug_era_3"
        WHERE ("drug_era_3"."row_number" = 1)
      ) AS "drug_era_4"
      WHERE ("drug_era_4"."row_number" = 1)
    ) AS "drug_era_5"
  ) AS "drug_era_6"
  GROUP BY
    "drug_era_6"."person_id",
    "drug_era_6"."group"
) AS "drug_era_7";
"""

DBInterface.execute(conn, sql) 
DBInterface.execute(conn, "SELECT * FROM COHORT LIMIT 5;") |> DataFrame
 # Results in empty dataframe - should not be empty

TheCedarPrince avatar Sep 24 '22 22:09 TheCedarPrince

Because your SQL includes multiple statements, and we haven't implemented DBInterface.executemultiple correctly so that the default behavior is only executing the first statement. A workaround is to execute single statement sequentially. We can also implement DBInterface.executemultiple correctly with the method described in SQLite forum which is similar with executescript in Python. All C API functions in SQLite have been exposed now so you can propose a PR if you are interested in it.

metab0t avatar Sep 25 '22 02:09 metab0t