Fully Support Usage from DBeaver
One important aspect to be able to debug tests or to interact with a database while in development is to be able to inspect the data. One tool that is arguable in widespread usage is DBeaver.
This issue will track the progress to fully support the usage of fakesnow as a fake Snowflake database. DBeaver uses the jdbc connector.
Setup:
In the driver settings I had to change the url to be:
jdbc:snowflake://127.0.0.1:3141/?db=test_db&schema=main&warehouse=test_wh&ssl=false&FAKESNOW_DB_PATH=some_path. Note that localhost won't work as the connector extracts the account name from the host, and for that it requires the host to have dots in its name.
I had to manually create the folder somepath and database somepath/TEST_DB.db, for which I used the duckdb cli.
Dbeaver logs: ~/Library/DBeaverData/workspace6/.metadata
Issues:
- [x] Support
show /* JDBC:DatabaseMetaData.getSchemas() */ schemas in account - [x] Support describre for all queries in particular
describe show databasesdoes not work. See changes incursor.py - [ ] Incorrect data types shown (see attached image, unfortunately I was not able to find where those types are returned)
Code changes: Below are the code changes I did locally so that I could connect and execute queries against fakesnow.
server.py
from __future__ import annotations
import gzip
import json
import secrets
from base64 import b64encode
from dataclasses import dataclass
from typing import Any
import snowflake.connector.errors
from starlette.applications import Starlette
from starlette.concurrency import run_in_threadpool
from starlette.requests import Request
from starlette.responses import JSONResponse
from starlette.routing import Route
from fakesnow.arrow import to_ipc, to_sf
from fakesnow.fakes import FakeSnowflakeConnection
from fakesnow.instance import FakeSnow
from fakesnow.rowtype import describe_as_rowtype
shared_fs = FakeSnow()
sessions: dict[str, FakeSnowflakeConnection] = {}
@dataclass
class ServerError(Exception):
status_code: int
code: str
message: str
async def login_request(request: Request) -> JSONResponse:
database = request.query_params.get("databaseName")
schema = request.query_params.get("schemaName")
body = await request.body()
if request.headers.get("Content-Encoding") == "gzip":
body = gzip.decompress(body)
body_json = json.loads(body)
print("req", body_json)
session_params: dict[str, Any] = body_json["data"]["SESSION_PARAMETERS"]
if db_path := session_params.get("FAKESNOW_DB_PATH"):
# isolated creates a new in-memory database, rather than using the shared in-memory database
# so this connection won't share any tables with other connections
fs = FakeSnow() if db_path == ":isolated:" else FakeSnow(db_path=db_path)
else:
# share the in-memory database across connections
fs = shared_fs
token = secrets.token_urlsafe(32)
sessions[token] = fs.connect(database, schema)
res = JSONResponse({
"data": {
"token": token,
"parameters": [{"name": "AUTOCOMMIT", "value": True}],
},
"success": True,
})
print("res", res.body)
return res
async def query_request(request: Request) -> JSONResponse:
try:
conn = to_conn(request)
body = await request.body()
if request.headers.get("Content-Encoding") == "gzip":
body = gzip.decompress(body)
body_json = json.loads(body)
print("req", body_json)
sql_text = body_json["sqlText"]
sql_text = sql_text.replace("show /* JDBC:DatabaseMetaData.getCatalogs() */ databases in account", "show databases")
sql_text = sql_text.replace("show /* JDBC:DatabaseMetaData.getSchemas() */ schemas in account", "show databases")
# https://docs.snowflake.com/en/sql-reference/sql/show-functions
sql_text = sql_text.replace("show /* JDBC:DatabaseMetaData.getProcedures() */ functions in account", "select 'fn' where false")
sql_text = sql_text.replace("show /* JDBC:DatabaseMetaData.getProcedures() */ procedures in account", "select 'fn' where false")
try:
# only a single sql statement is sent at a time by the python snowflake connector
cur = await run_in_threadpool(conn.cursor().execute, sql_text)
except snowflake.connector.errors.ProgrammingError as e:
code = f"{e.errno:06d}"
return JSONResponse(
{
"data": {
"errorCode": code,
"sqlState": e.sqlstate,
},
"code": code,
"message": e.msg,
"success": False,
}
)
rowtype = describe_as_rowtype(cur._describe_last_sql()) # noqa: SLF001
if cur._arrow_table: # noqa: SLF001
batch_bytes = to_ipc(to_sf(cur._arrow_table, rowtype)) # noqa: SLF001
rowset_b64 = b64encode(batch_bytes).decode("utf-8")
else:
rowset_b64 = ""
res = JSONResponse(
{
"data": {
"rowtype": rowtype,
"rowsetBase64": rowset_b64,
"total": cur._rowcount, # noqa: SLF001
"queryId": cur.sfqid,
"queryResultFormat": "arrow",
},
"success": True,
}
)
print("res", res.body)
return res
except ServerError as e:
return JSONResponse(
{"data": None, "code": e.code, "message": e.message, "success": False, "headers": None},
status_code=e.status_code,
)
def to_conn(request: Request) -> FakeSnowflakeConnection:
if not (auth := request.headers.get("Authorization")):
raise ServerError(status_code=401, code="390103", message="Session token not found in the request data.")
token = auth[17:-1]
if not (conn := sessions.get(token)):
raise ServerError(status_code=401, code="390104", message="User must login again to access the service.")
return conn
routes = [
Route(
"/session/v1/login-request",
login_request,
methods=["POST"],
),
Route(
"/queries/v1/query-request",
query_request,
methods=["POST"],
),
Route("/queries/v1/abort-request", lambda _: JSONResponse({"success": True}), methods=["POST"]),
]
app = Starlette(debug=True, routes=routes)
cursor.py
...
def describe(self, command: str, *args: Any, **kwargs: Any) -> list[ResultMetadata]:
"""Return the schema of the result without executing the query.
Takes the same arguments as execute
Returns:
list[ResultMetadata]: _description_
"""
if "show databases" in command.lower():
return [("database", "VARCHAR", "null", "key", "default", "extra")]
describe = f"DESCRIBE {command}"
self.execute(describe, *args, **kwargs)
return describe_as_result_metadata(self.fetchall())
@property
def description(self) -> list[ResultMetadata]:
return describe_as_result_metadata(self._describe_last_sql())
def _describe_last_sql(self) -> list:
if "show databases" in self._last_sql.lower():
return [("database", "VARCHAR", "null", "key", "default", "extra")]
# use a separate cursor to avoid consuming the result set on this cursor
with self._conn.cursor() as cur:
# TODO: can we replace with self._duck_conn.description?
expression = sqlglot.parse_one(f"DESCRIBE {self._last_sql}", read="duckdb")
cur._execute(expression, self._last_params) # noqa: SLF001
return cur.fetchall()
...
Autocreate database if file does not exist
@sk- Can you try setting the FAKESNOW_DB_PATH session parameter value to tell fakesnow to use that path for its databases? During login it should create the database in that path if its doesn't exist.
@tekumara I initially tested the connection using FAKESNOW_DB_PATH as shown in the jdb connection url listed in the OP.
Below is the exception I get when the database is not previously created.
Login Request
{
'data': {
'ACCOUNT_NAME': '127',
'SESSION_PARAMETERS': {'FAKESNOW_DB_PATH': 'some_path'},
'CLIENT_APP_ID': 'JDBC',
'EXT_AUTHN_DUO_METHOD': 'push',
'PASSWORD': 'foo',
'CLIENT_ENVIRONMENT': {
'schema': 'main',
'tracing': 'INFO',
'OS': 'Mac OS X',
'OCSP_MODE': 'FAIL_OPEN',
'JAVA_VM': 'OpenJDK 64-Bit Server VM',
'APPLICATION': 'DBeaver_DBeaver',
'warehouse': 'test_wh',
'JDBC_JAR_NAME': 'snowflake-jdbc-3.14.4',
'password': '****',
'database': 'test_db',
'application': 'DBeaver_DBeaver',
'OS_VERSION': '15.3.1',
'serverURL': 'http://127.0.0.1:3141/',
'JAVA_VERSION': '21.0.5',
'user': 'goo',
'account': '127',
'JAVA_RUNTIME': 'OpenJDK Runtime Environment'
},
'CLIENT_APP_VERSION': '3.14.4',
'LOGIN_NAME': 'goo'
},
'inFlightCtx': None
}
Login Exception
INFO: 127.0.0.1:57288 - "POST /session/v1/login-request?databaseName=test_db&schemaName=main&warehouse=test_wh&requestId=88801eb9-1e2f-439c-b72d-f051275eb483&request_guid=fb3d70b1-6085-4bdc-b5d0-de4abead921b HTTP/1.1" 500 Internal Server Error
ERROR: Exception in ASGI application
Traceback (most recent call last):
File ".venv/lib/python3.12/site-packages/uvicorn/protocols/http/h11_impl.py", line 403, in run_asgi
result = await app( # type: ignore[func-returns-value]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File ".venv/lib/python3.12/site-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__
return await self.app(scope, receive, send)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File ".venv/lib/python3.12/site-packages/starlette/applications.py", line 112, in __call__
await self.middleware_stack(scope, receive, send)
File ".venv/lib/python3.12/site-packages/starlette/middleware/errors.py", line 187, in __call__
raise exc
File ".venv/lib/python3.12/site-packages/starlette/middleware/errors.py", line 165, in __call__
await self.app(scope, receive, _send)
File ".venv/lib/python3.12/site-packages/starlette/middleware/exceptions.py", line 62, in __call__
await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
File ".venv/lib/python3.12/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
raise exc
File ".venv/lib/python3.12/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
await app(scope, receive, sender)
File ".venv/lib/python3.12/site-packages/starlette/routing.py", line 715, in __call__
await self.middleware_stack(scope, receive, send)
File ".venv/lib/python3.12/site-packages/starlette/routing.py", line 735, in app
await route.handle(scope, receive, send)
File ".venv/lib/python3.12/site-packages/starlette/routing.py", line 288, in handle
await self.app(scope, receive, send)
File ".venv/lib/python3.12/site-packages/starlette/routing.py", line 76, in app
await wrap_app_handling_exceptions(app, request)(scope, receive, send)
File ".venv/lib/python3.12/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
raise exc
File ".venv/lib/python3.12/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
await app(scope, receive, sender)
File ".venv/lib/python3.12/site-packages/starlette/routing.py", line 73, in app
response = await f(request)
^^^^^^^^^^^^^^^^
File ".venv/lib/python3.12/site-packages/fakesnow/server.py", line 50, in login_request
sessions[token] = fs.connect(database, schema)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File ".venv/lib/python3.12/site-packages/fakesnow/instance.py", line 86, in connect
return fakes.FakeSnowflakeConnection(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File ".venv/lib/python3.12/site-packages/fakesnow/conn.py", line 66, in __init__
duck_conn.execute(f"ATTACH DATABASE '{db_file}' AS {self.database}")
duckdb.duckdb.IOException: IO Error: Cannot open file "some_path/TEST_DB.db": No such file or directory
Thanks @sk- I've created #197 to track this
@sk- can you elaborate on this:
Incorrect data types shown (see attached image, unfortunately I was not able to find where those types are returned)
The data types I see on a real instance match your screenshot (see below). They aren't something fakesnow returns, so my guess is they are embedded in the JDBC driver.