sqlmesh icon indicating copy to clipboard operation
sqlmesh copied to clipboard

Accessing the schema of an upstream model in a sql-model-defined-with-python

Open ananis25 opened this issue 7 months ago • 6 comments

Accessing the schema of an upstream model doesn't quite work for SQL-models-defined-with-python. Here is a code repro.

import os
from pathlib import Path
from tempfile import TemporaryDirectory

from sqlmesh.core.config import (
    Config,
    GatewayConfig,
    ModelDefaultsConfig,
    DuckDBConnectionConfig,
)
from sqlmesh.core.context import Context

folder = TemporaryDirectory()
tmp_path = Path(folder.name)
print("TemporaryDirectory: ", tmp_path)

first_model_definition = """
MODEL (
    name FEATURES.FIRST,
    kind VIEW,
    grain ID,
);

SELECT
    1 AS ID, /* Customer ID */
    '[email protected]' AS EMAIL, /* Email address of the customer */
"""

second_model_definition = """
from sqlglot import exp
from sqlmesh.core.macros import MacroEvaluator
from sqlmesh.core.model import model
from sqlmesh.core.model.kind import ModelKindName

PARENT_MODELS = ["FEATURES.FIRST"]

@model(
    "FEATURES.SECOND",
    is_sql=True,
    kind={"name": ModelKindName.FULL},
    depends_on=PARENT_MODELS,
)
def entrypoint(evaluator: MacroEvaluator) -> exp.Expression:
    dict_types = evaluator.columns_to_types(PARENT_MODELS[0])
    print(dict_types)
    if dict_types["id"].Type == exp.DataType.Type.TEXT:
        return exp.select("'1'::TEXT as col")
    else:
        return exp.select("1 as col")
"""

os.makedirs(tmp_path / "models", exist_ok=True)
first_model_path = tmp_path / "models" / "first.sql"
first_model_path.write_text(first_model_definition)
second_model_path = tmp_path / "models" / "second.py"
second_model_path.write_text(second_model_definition)

db_path = str(tmp_path / "repro_db.db")
config = Config(
    gateways={
        "main": GatewayConfig(connection=DuckDBConnectionConfig(database=db_path))
    },
    model_defaults=ModelDefaultsConfig(dialect="duckdb"),
)
context = Context(paths=tmp_path, config=config)
context.plan(auto_apply=True, no_prompts=True)

And the error traceback:

Traceback (most recent call last):

  File "/home/ubuntu/repos/sqlmesh-repro/.venv/lib/python3.12/site-packages/sqlmesh/core/macros.py", line 219, in send
    return func(*bound.args, **bound.kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

  File 'models/second.py' (or imported file), line 4, in entrypoint
    def entrypoint(evaluator: MacroEvaluator):
        dict_types = evaluator.columns_to_types(PARENT_MODELS[0])
        print(dict_types)
        if dict_types['id'].Type == exp.DataType.Type.TEXT:


KeyError: 'id'

ananis25 avatar Jul 13 '24 05:07 ananis25