superduper
superduper copied to clipboard
[MISC] Flatten document encode for UI
Instead of nesting content inside a document, all content should get saved to the _leaves
key-word field. This is a list, and may be referred to with the id
field.
An example:
{
"_leaves": [
{
"leaf_type": "component",
"type_id": "datatype",
"cls": "vector",
"module": "superduperdb.components.vector_index",
"dict": {
"shape": 384,
"identifier": "test"
},
"id": "_component/datatype/test"
},
{
"leaf_type": "component",
"type_id": "model",
"cls": "SentenceTransformer",
"module": "superduperdb.ext.sentence_transformers.model",
"id": "_component/model/my-model",
"dict": {
"identifier": "my-model",
"predict_kwargs": {
"show_progress_bar": true
},
"signature": "*args,**kwargs",
"model": "all-MiniLM-L6-v2",
"device": "cpu",
"postprocess": "from superduperdb import code\n\n@code\ndef my_code(x):\n return x.tolist()\n",
"datatype": "_component/datatype/vec384"
}
},
{
"leaf_type": "component",
"type_id": "listener",
"cls": "Listener",
"module": "superduperdb.components.listener",
"id": "_component/listener/my-listener",
"dict": {
"identifier": "my-listener",
"key": "txt",
"model": "_component/model/my-model",
"select": {
"documents": [],
"query": "docu.find()"
},
"active": true,
"predict_kwargs": {}
}
},
{
"leaf_type": "component",
"type_id": "vector-index",
"cls": "VectorIndex",
"module": "superduperdb.components.vector_index",
"dict": {
"identifier": "test",
"indexing_listener": "my-listener",
"measure": "cosine"
},
"id": "_component/vector_index/test"
}
],
"my_key": "_component/vector_index/test"
}