Unable to query JSON arrays
I've got some tables that use JSON fields, and would like to query their contents. The fields have a variable structure, and might contain either arrays or dicts. However, I can only query the top level key of a dict, since somewhere down the line the index accessor gets converted into a parameter that must be a string. This also prevents path index operations, since the SQLAlchemy syntax for doing so is to use a tuple in this place.
Currently, I'm using SQLAlchemy directly as a workaround, but it'd be nice to be able to do this through databases. Is this something that others would be interested in?
MRE:
import asyncio
import databases
import sqlalchemy
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.ext.asyncio import create_async_engine
async def main():
DATABASE_URL = "postgresql+asyncpg://test:tube@localhost/test"
metadata = sqlalchemy.MetaData()
example = sqlalchemy.Table(
"example",
metadata,
sqlalchemy.Column("id", sqlalchemy.Integer, primary_key=True),
sqlalchemy.Column("data", JSONB),
)
database = databases.Database(DATABASE_URL)
await database.connect()
query = example.insert()
values = [{"data": [1, 2, 3]}, {"data": {"0": 1}}]
await database.execute_many(query=query, values=values)
# this works
query = example.select().where(example.c.data["0"].as_integer() == 1)
# this throws
# query = example.select().where(example.c.data[0].as_integer() == 1)
await database.fetch_all(query=query)
# using the SQLAlchemy engine directly works
engine = create_async_engine(DATABASE_URL)
async with engine.connect() as conn:
q2 = example.select().where(example.c.data[0].as_integer() == 1)
r = await conn.execute(q2)
res = r.fetchall()
if __name__ == "__main__":
asyncio.run(main())
Traceback:
Traceback (most recent call last):
File "asyncpg/protocol/prepared_stmt.pyx", line 168, in asyncpg.protocol.protocol.PreparedStatementState._encode_bind_msg
File "asyncpg/protocol/codecs/base.pyx", line 206, in asyncpg.protocol.protocol.Codec.encode
File "asyncpg/protocol/codecs/base.pyx", line 111, in asyncpg.protocol.protocol.Codec.encode_scalar
File "asyncpg/pgproto/./codecs/text.pyx", line 29, in asyncpg.pgproto.pgproto.text_encode
File "asyncpg/pgproto/./codecs/text.pyx", line 12, in asyncpg.pgproto.pgproto.as_pg_string_and_size
TypeError: expected str, got int
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/example.py", line 35, in <module>
asyncio.run(main())
File "/opt/tooling/pyenv/versions/3.10.1/lib/python3.10/asyncio/runners.py", line 44, in run
return loop.run_until_complete(main)
File "/opt/tooling/pyenv/versions/3.10.1/lib/python3.10/asyncio/base_events.py", line 641, in run_until_complete
return future.result()
File "/example.py", line 31, in main
await database.fetch_all(query=query)
File "/opt/tooling/pyenv/versions/example-env/lib/python3.10/site-packages/databases/core.py", line 149, in fetch_all
return await connection.fetch_all(query, values)
File "/opt/tooling/pyenv/versions/example-env/lib/python3.10/site-packages/databases/core.py", line 271, in fetch_all
return await self._connection.fetch_all(built_query)
File "/opt/tooling/pyenv/versions/example-env/lib/python3.10/site-packages/databases/backends/postgres.py", line 177, in fetch_all
rows = await self._connection.fetch(query_str, *args)
File "/opt/tooling/pyenv/versions/example-env/lib/python3.10/site-packages/asyncpg/connection.py", line 621, in fetch
return await self._execute(
File "/opt/tooling/pyenv/versions/example-env/lib/python3.10/site-packages/asyncpg/connection.py", line 1659, in _execute
result, _ = await self.__execute(
File "/opt/tooling/pyenv/versions/example-env/lib/python3.10/site-packages/asyncpg/connection.py", line 1684, in __execute
return await self._do_execute(
File "/opt/tooling/pyenv/versions/example-env/lib/python3.10/site-packages/asyncpg/connection.py", line 1731, in _do_execute
result = await executor(stmt, None)
File "asyncpg/protocol/protocol.pyx", line 183, in bind_execute
File "asyncpg/protocol/prepared_stmt.pyx", line 197, in asyncpg.protocol.protocol.PreparedStatementState._encode_bind_msg
asyncpg.exceptions.DataError: invalid input for query argument $1: 0 (expected str, got int)
OS: Ubuntu Python Version: 3.10 Databases Version: 0.5.5 Database: postgresql Driver: asyncpg
This is somewhat unhelpful to solve your issue, but I was wondering how can a similar query be executed if the Table is defined declaratively, ie,
import sqlalchemy
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
class Example(Base):
__tablename__ = "example"
sqlalchemy.Column("id", sqlalchemy.Integer, primary_key=True)
sqlalchemy.Column("data", JSONB)