erlexec
erlexec copied to clipboard
Handle `None` in columns in defined in storage backends
@jjlatval mentioned that providing None
value in columns can cause unnexpected behaviours.
The following snippet can be executed
from docarray import Document, DocumentArray
DA_CONFIG = {
"storage": "weaviate",
"config": {
"name": "Persisted",
"host": "localhost",
"port": 8080,
"vector_cache_max_objects": 3000000,
"columns": [("rank", "int"), ("year", "int")]
}
}
da = DocumentArray(**DA_CONFIG)
docs = [Document(text="Some text here", rank=2, year=2020),
Document(text="Another kind of text here", rank=1, year=2022),
Document(text="This document is bad", rank=2, year=3)]
with da:
da.clear()
da.extend(docs)
da[:,'tags__rank']
But when we have a None value then we can't index to a Document containing a None
in a column that was specified with a certain datatype.
from docarray import Document, DocumentArray
DA_CONFIG = {
"storage": "weaviate",
"config": {
"name": "Persisted",
"host": "localhost",
"port": 8080,
"vector_cache_max_objects": 3000000,
"columns": [("rank", "int"), ("year", "int")]
}
}
da = DocumentArray(**DA_CONFIG)
docs = [Document(text="Some text here", rank=2, year=2020),
Document(text="Another kind of text here", rank=1, year=2022),
Document(text="This document is bad", rank=None, year=3)]
with da:
da.clear()
da.extend(docs)
# this will break
da[:,'tags__rank']