haystack-core-integrations
haystack-core-integrations copied to clipboard
fix: add support for custom mapping in ElasticsearchDocumentStore
Fixes: https://github.com/deepset-ai/haystack-core-integrations/issues/655
Added support for custom mapping when creating a new index using ElasticsearchDocumentStore.
Tested the fix using the below code:
from haystack_integrations.document_stores.elasticsearch import ElasticsearchDocumentStore
from haystack import Pipeline
from haystack.components.embedders import SentenceTransformersDocumentEmbedder
from haystack.components.converters import TextFileToDocument
from haystack.components.preprocessors import DocumentSplitter
from haystack.components.writers import DocumentWriter
from haystack_integrations.components.retrievers.elasticsearch import ElasticsearchEmbeddingRetriever
mappings = {
"properties": {
"embedding": {"type": "dense_vector", "index": True, "similarity": "dot_product"},
"content": {"type": "text"},
},
"dynamic_templates": [
{
"strings": {
"path_match": "*",
"match_mapping_type": "string",
"mapping": {
"type": "keyword",
},
}
}
],
}
document_store = ElasticsearchDocumentStore(hosts = "http://localhost:9200", custom_mapping=mappings)
converter = TextFileToDocument()
splitter = DocumentSplitter()
doc_embedder = SentenceTransformersDocumentEmbedder(model="sentence-transformers/multi-qa-mpnet-base-dot-v1")
writer = DocumentWriter(document_store)
indexing_pipeline = Pipeline()
indexing_pipeline.add_component("converter", converter)
indexing_pipeline.add_component("splitter", splitter)
indexing_pipeline.add_component("doc_embedder", doc_embedder)
indexing_pipeline.add_component("writer", writer)
indexing_pipeline.connect("converter", "splitter")
indexing_pipeline.connect("splitter", "doc_embedder")
indexing_pipeline.connect("doc_embedder", "writer")
indexing_pipeline.run({
"converter":{"sources":["filename.txt"]}
})