fakesnow icon indicating copy to clipboard operation
fakesnow copied to clipboard

Fully Support Usage from DBeaver

Open sk- opened this issue 10 months ago • 3 comments

One important aspect to be able to debug tests or to interact with a database while in development is to be able to inspect the data. One tool that is arguable in widespread usage is DBeaver.

This issue will track the progress to fully support the usage of fakesnow as a fake Snowflake database. DBeaver uses the jdbc connector.

Setup: In the driver settings I had to change the url to be: jdbc:snowflake://127.0.0.1:3141/?db=test_db&schema=main&warehouse=test_wh&ssl=false&FAKESNOW_DB_PATH=some_path. Note that localhost won't work as the connector extracts the account name from the host, and for that it requires the host to have dots in its name.

I had to manually create the folder somepath and database somepath/TEST_DB.db, for which I used the duckdb cli.

Dbeaver logs: ~/Library/DBeaverData/workspace6/.metadata

Issues:

  • [x] Support show /* JDBC:DatabaseMetaData.getSchemas() */ schemas in account
  • [x] Support describre for all queries in particular describe show databases does not work. See changes in cursor.py
  • [ ] Incorrect data types shown (see attached image, unfortunately I was not able to find where those types are returned)
Image Image Image

Code changes: Below are the code changes I did locally so that I could connect and execute queries against fakesnow.

server.py
from __future__ import annotations

import gzip
import json
import secrets
from base64 import b64encode
from dataclasses import dataclass
from typing import Any

import snowflake.connector.errors
from starlette.applications import Starlette
from starlette.concurrency import run_in_threadpool
from starlette.requests import Request
from starlette.responses import JSONResponse
from starlette.routing import Route

from fakesnow.arrow import to_ipc, to_sf
from fakesnow.fakes import FakeSnowflakeConnection
from fakesnow.instance import FakeSnow
from fakesnow.rowtype import describe_as_rowtype

shared_fs = FakeSnow()
sessions: dict[str, FakeSnowflakeConnection] = {}


@dataclass
class ServerError(Exception):
    status_code: int
    code: str
    message: str


async def login_request(request: Request) -> JSONResponse:
    database = request.query_params.get("databaseName")
    schema = request.query_params.get("schemaName")
    body = await request.body()
    if request.headers.get("Content-Encoding") == "gzip":
        body = gzip.decompress(body)
    body_json = json.loads(body)
    print("req", body_json)
    session_params: dict[str, Any] = body_json["data"]["SESSION_PARAMETERS"]
    if db_path := session_params.get("FAKESNOW_DB_PATH"):
        # isolated creates a new in-memory database, rather than using the shared in-memory database
        # so this connection won't share any tables with other connections
        fs = FakeSnow() if db_path == ":isolated:" else FakeSnow(db_path=db_path)
    else:
        # share the in-memory database across connections
        fs = shared_fs
    token = secrets.token_urlsafe(32)
    sessions[token] = fs.connect(database, schema)
    res = JSONResponse({
        "data": {
            "token": token,
            "parameters": [{"name": "AUTOCOMMIT", "value": True}],
        },
        "success": True,
    })
    print("res", res.body)
    return res


async def query_request(request: Request) -> JSONResponse:
    try:
        conn = to_conn(request)

        body = await request.body()
        if request.headers.get("Content-Encoding") == "gzip":
            body = gzip.decompress(body)

        body_json = json.loads(body)
        print("req", body_json)

        sql_text = body_json["sqlText"]
        sql_text = sql_text.replace("show /* JDBC:DatabaseMetaData.getCatalogs() */ databases in account", "show databases")
        sql_text = sql_text.replace("show /* JDBC:DatabaseMetaData.getSchemas() */ schemas in account", "show databases")
        # https://docs.snowflake.com/en/sql-reference/sql/show-functions
        sql_text = sql_text.replace("show /* JDBC:DatabaseMetaData.getProcedures() */ functions in account", "select 'fn' where false")
        sql_text = sql_text.replace("show /* JDBC:DatabaseMetaData.getProcedures() */ procedures in account", "select 'fn' where false")

        try:
            # only a single sql statement is sent at a time by the python snowflake connector
            cur = await run_in_threadpool(conn.cursor().execute, sql_text)
        except snowflake.connector.errors.ProgrammingError as e:
            code = f"{e.errno:06d}"
            return JSONResponse(
                {
                    "data": {
                        "errorCode": code,
                        "sqlState": e.sqlstate,
                    },
                    "code": code,
                    "message": e.msg,
                    "success": False,
                }
            )

        rowtype = describe_as_rowtype(cur._describe_last_sql())  # noqa: SLF001

        if cur._arrow_table:  # noqa: SLF001
            batch_bytes = to_ipc(to_sf(cur._arrow_table, rowtype))  # noqa: SLF001
            rowset_b64 = b64encode(batch_bytes).decode("utf-8")
        else:
            rowset_b64 = ""

        res = JSONResponse(
            {
                "data": {
                    "rowtype": rowtype,
                    "rowsetBase64": rowset_b64,
                    "total": cur._rowcount,  # noqa: SLF001
                    "queryId": cur.sfqid,
                    "queryResultFormat": "arrow",
                },
                "success": True,
            }
        )
        print("res", res.body)
        return res

    except ServerError as e:
        return JSONResponse(
            {"data": None, "code": e.code, "message": e.message, "success": False, "headers": None},
            status_code=e.status_code,
        )


def to_conn(request: Request) -> FakeSnowflakeConnection:
    if not (auth := request.headers.get("Authorization")):
        raise ServerError(status_code=401, code="390103", message="Session token not found in the request data.")

    token = auth[17:-1]

    if not (conn := sessions.get(token)):
        raise ServerError(status_code=401, code="390104", message="User must login again to access the service.")

    return conn


routes = [
    Route(
        "/session/v1/login-request",
        login_request,
        methods=["POST"],
    ),
    Route(
        "/queries/v1/query-request",
        query_request,
        methods=["POST"],
    ),
    Route("/queries/v1/abort-request", lambda _: JSONResponse({"success": True}), methods=["POST"]),
]

app = Starlette(debug=True, routes=routes)
cursor.py
...
    def describe(self, command: str, *args: Any, **kwargs: Any) -> list[ResultMetadata]:
        """Return the schema of the result without executing the query.

        Takes the same arguments as execute

        Returns:
            list[ResultMetadata]: _description_
        """
        if "show databases" in command.lower():
            return [("database", "VARCHAR", "null", "key", "default", "extra")]

        describe = f"DESCRIBE {command}"
        self.execute(describe, *args, **kwargs)
        return describe_as_result_metadata(self.fetchall())

    @property
    def description(self) -> list[ResultMetadata]:
        return describe_as_result_metadata(self._describe_last_sql())

    def _describe_last_sql(self) -> list:
        if "show databases" in self._last_sql.lower():
            return [("database", "VARCHAR", "null", "key", "default", "extra")]
        # use a separate cursor to avoid consuming the result set on this cursor
        with self._conn.cursor() as cur:
            # TODO: can we replace with self._duck_conn.description?
            expression = sqlglot.parse_one(f"DESCRIBE {self._last_sql}", read="duckdb")
            cur._execute(expression, self._last_params)  # noqa: SLF001
            return cur.fetchall()

...

sk- avatar Feb 17 '25 20:02 sk-

Autocreate database if file does not exist

@sk- Can you try setting the FAKESNOW_DB_PATH session parameter value to tell fakesnow to use that path for its databases? During login it should create the database in that path if its doesn't exist.

tekumara avatar Mar 09 '25 01:03 tekumara

@tekumara I initially tested the connection using FAKESNOW_DB_PATH as shown in the jdb connection url listed in the OP.

Below is the exception I get when the database is not previously created.

Login Request

{
  'data': {
    'ACCOUNT_NAME': '127',
    'SESSION_PARAMETERS': {'FAKESNOW_DB_PATH': 'some_path'},
    'CLIENT_APP_ID': 'JDBC',
    'EXT_AUTHN_DUO_METHOD': 'push',
    'PASSWORD': 'foo',
    'CLIENT_ENVIRONMENT': {
      'schema': 'main',
      'tracing': 'INFO',
      'OS': 'Mac OS X',
      'OCSP_MODE': 'FAIL_OPEN',
      'JAVA_VM': 'OpenJDK 64-Bit Server VM',
      'APPLICATION': 'DBeaver_DBeaver',
      'warehouse': 'test_wh',
      'JDBC_JAR_NAME': 'snowflake-jdbc-3.14.4',
      'password': '****',
      'database': 'test_db',
      'application': 'DBeaver_DBeaver',
      'OS_VERSION': '15.3.1',
      'serverURL': 'http://127.0.0.1:3141/',
      'JAVA_VERSION': '21.0.5',
      'user': 'goo',
      'account': '127',
      'JAVA_RUNTIME': 'OpenJDK Runtime Environment'
    },
    'CLIENT_APP_VERSION': '3.14.4',
    'LOGIN_NAME': 'goo'
  },
  'inFlightCtx': None
}

Login Exception

INFO:     127.0.0.1:57288 - "POST /session/v1/login-request?databaseName=test_db&schemaName=main&warehouse=test_wh&requestId=88801eb9-1e2f-439c-b72d-f051275eb483&request_guid=fb3d70b1-6085-4bdc-b5d0-de4abead921b HTTP/1.1" 500 Internal Server Error
ERROR:    Exception in ASGI application
Traceback (most recent call last):
  File ".venv/lib/python3.12/site-packages/uvicorn/protocols/http/h11_impl.py", line 403, in run_asgi
    result = await app(  # type: ignore[func-returns-value]
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File ".venv/lib/python3.12/site-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__
    return await self.app(scope, receive, send)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File ".venv/lib/python3.12/site-packages/starlette/applications.py", line 112, in __call__
    await self.middleware_stack(scope, receive, send)
  File ".venv/lib/python3.12/site-packages/starlette/middleware/errors.py", line 187, in __call__
    raise exc
  File ".venv/lib/python3.12/site-packages/starlette/middleware/errors.py", line 165, in __call__
    await self.app(scope, receive, _send)
  File ".venv/lib/python3.12/site-packages/starlette/middleware/exceptions.py", line 62, in __call__
    await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
  File ".venv/lib/python3.12/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
    raise exc
  File ".venv/lib/python3.12/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
    await app(scope, receive, sender)
  File ".venv/lib/python3.12/site-packages/starlette/routing.py", line 715, in __call__
    await self.middleware_stack(scope, receive, send)
  File ".venv/lib/python3.12/site-packages/starlette/routing.py", line 735, in app
    await route.handle(scope, receive, send)
  File ".venv/lib/python3.12/site-packages/starlette/routing.py", line 288, in handle
    await self.app(scope, receive, send)
  File ".venv/lib/python3.12/site-packages/starlette/routing.py", line 76, in app
    await wrap_app_handling_exceptions(app, request)(scope, receive, send)
  File ".venv/lib/python3.12/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
    raise exc
  File ".venv/lib/python3.12/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
    await app(scope, receive, sender)
  File ".venv/lib/python3.12/site-packages/starlette/routing.py", line 73, in app
    response = await f(request)
               ^^^^^^^^^^^^^^^^
  File ".venv/lib/python3.12/site-packages/fakesnow/server.py", line 50, in login_request
    sessions[token] = fs.connect(database, schema)
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File ".venv/lib/python3.12/site-packages/fakesnow/instance.py", line 86, in connect
    return fakes.FakeSnowflakeConnection(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File ".venv/lib/python3.12/site-packages/fakesnow/conn.py", line 66, in __init__
    duck_conn.execute(f"ATTACH DATABASE '{db_file}' AS {self.database}")
duckdb.duckdb.IOException: IO Error: Cannot open file "some_path/TEST_DB.db": No such file or directory

sk- avatar Mar 10 '25 12:03 sk-

Thanks @sk- I've created #197 to track this

tekumara avatar Mar 20 '25 03:03 tekumara

@sk- can you elaborate on this:

Incorrect data types shown (see attached image, unfortunately I was not able to find where those types are returned)

The data types I see on a real instance match your screenshot (see below). They aren't something fakesnow returns, so my guess is they are embedded in the JDBC driver.

Image

tekumara avatar Jun 01 '25 00:06 tekumara