databricks-sql-python
databricks-sql-python copied to clipboard
Enable `delta.feature.allowColumnDefaults` for all tables
Resolves #324
MCVE
from datetime import datetime as DateTime
from typing import TypeAlias
import sqlalchemy as sa
from databricks.sqlalchemy import TIMESTAMP
from sqlalchemy.orm import (
DeclarativeBase,
declared_attr,
Mapped,
mapped_column,
MappedAsDataclass,
)
from sqlalchemy.types import (
Double,
String,
)
from typing_extensions import Annotated
float64: TypeAlias = Double
uint64: TypeAlias = sa.BigInteger
str32 = Annotated[str, 32]
str64 = Annotated[str, 64]
TABLE_NAME_MAPPING = {
'Model': 'model_metadata',
}
class Base(MappedAsDataclass, DeclarativeBase):
@declared_attr.directive
def __tablename__(cls) -> str:
return TABLE_NAME_MAPPING[cls.__name__]
type_annotation_map = {
float64: Double,
uint64: sa.BigInteger,
str32: String(32),
str64: String(64),
}
insert_timestamp: Mapped[DateTime] = mapped_column(
TIMESTAMP,
init=False,
nullable=False,
server_default=sa.func.current_timestamp(),
)
class Model(Base):
__table_args__ = (
sa.UniqueConstraint("name", "version", name='unique_key'),
)
pkid: Mapped[uint64] = mapped_column(
sa.BigInteger,
sa.Identity(always=True),
init=False,
primary_key=True,
)
name: Mapped[str64]
version: Mapped[str32]
I thought I'd get the ball rolling. Feel free to pick this up / push any changes you want.
This seems to work - the tables have the correct TBLPROPERTIES DDL emitted :
2024-02-02 15:00:09,243 INFO sqlalchemy.engine.Engine CREATE SCHEMA IF NOT EXISTS metadata
2024-02-02 15:00:09,244 INFO sqlalchemy.engine.Engine [no key 0.00171s] {}
Databricks dialect ignores SQLAlchemy's autoincrement semantics. Use explicit Identity() instead.
Databricks does not support unique constraints
2024-02-02 15:00:09,391 INFO sqlalchemy.engine.Engine
CREATE TABLE metadata.model_metadata (
pkid BIGINT GENERATED ALWAYS AS IDENTITY,
name STRING NOT NULL,
version STRING NOT NULL,
insert_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL,
PRIMARY KEY (pkid)
) USING DELTA
TBLPROPERTIES('delta.feature.allowColumnDefaults' = 'enabled')
2024-02-02 15:00:09,393 INFO sqlalchemy.engine.Engine [no key 0.00164s] {}
...and are able to be created with the correct properties:
SHOW CREATE TABLE model_metadata;
CREATE TABLE `catalog`.metadata.model_metadata (
pkid BIGINT NOT NULL GENERATED ALWAYS AS IDENTITY (START WITH 1 INCREMENT BY 1),
name STRING NOT NULL,
version STRING NOT NULL,
insert_timestamp TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
CONSTRAINT `model_metadata_pk` PRIMARY KEY (`pkid`))
USING delta
TBLPROPERTIES (
'delta.checkpoint.writeStatsAsJson' = 'false',
'delta.checkpoint.writeStatsAsStruct' = 'true',
'delta.enableDeletionVectors' = 'true',
'delta.feature.allowColumnDefaults' = 'supported',
'delta.feature.deletionVectors' = 'supported',
'delta.feature.identityColumns' = 'supported',
'delta.feature.invariants' = 'supported',
'delta.minReaderVersion' = '3',
'delta.minWriterVersion' = '7')