graphrag
graphrag copied to clipboard
[Feature Request]: Unable to read the created vector database, it needs to rebuild the collection every time.
Do you need to file an issue?
- [X] I have searched the existing issues and this feature is not already filed.
- [X] My model is hosted on OpenAI or Azure. If not, please look at the "model providers" issue and don't file a new one here.
- [X] I believe this is a legitimate feature request, not just a question. If this is a question, please use the Discussions area.
Is your feature request related to a problem? Please describe.
As shown in the code, each query execution will rebuild the vector store and generate a new local file.
def store_entity_semantic_embeddings(
entities: list[Entity],
vectorstore: BaseVectorStore,
) -> BaseVectorStore:
"""Store entity semantic embeddings in a vectorstore."""
documents = [
VectorStoreDocument(
id=entity.id,
text=entity.description,
vector=entity.description_embedding,
attributes=(
{"title": entity.title, **entity.attributes}
if entity.attributes
else {"title": entity.title}
),
)
for entity in entities
]
vectorstore.load_documents(documents=documents)
return vectorstore
def load_documents(
self, documents: list[VectorStoreDocument], overwrite: bool = True
) -> None:
"""Load documents into vector storage."""
data = [
{
"id": document.id,
"text": document.text,
"vector": document.vector,
"attributes": json.dumps(document.attributes),
}
for document in documents
if document.vector is not None
]
if len(data) == 0:
data = None
schema = pa.schema([
pa.field("id", pa.string()),
pa.field("text", pa.string()),
pa.field("vector", pa.list_(pa.float64())),
pa.field("attributes", pa.string()),
])
if overwrite:
if data:
self.document_collection = self.db_connection.create_table(
self.collection_name, data=data, mode="overwrite"
)
else:
self.document_collection = self.db_connection.create_table(
self.collection_name, schema=schema, mode="overwrite"
)
else:
# add data to existing table
self.document_collection = self.db_connection.open_table(
self.collection_name
)
if data:
self.document_collection.add(data)
Describe the solution you'd like
It is possible to link to an existing vector database and directly retrieve all the documents inside the collection.
Additional context
If necessary, I can provide specific implementation.