Flowise
Flowise copied to clipboard
[BUG] Unable to use PDF Loader with Weaviate Upsert Document - but other Doc Loaders work.
Describe the bug When connecting a PDF Document Loader to Weaviate Upsert Document, the flow does not create a class/index in Weaviate. On a fresh install of Weaviate the flow (see below for export) will return the error:
no graphql provider present, this is most likely because no schema is present. Import a schema first!
Or if I have some indexes already in my Weaviate then I get this error, where it asks if I mean a similar named index to the new one I want to create:
Error: Cannot query field "Documents2" on type "GetObjectsObj". Did you mean "Documents"?:
However, if I simply swap out the PDF Loader for the Cheerio Web Scraper for example, then everything works perfectly and the new class/index is created in Weaviate.
In both examples I have the metadata for the Document Loader set to:
{
doc:"docname"
}
and the settings for weaviate set similar to this:
Weaviate Index Name: Documents2
Weaviate Text Key: text
Weaviate Metadata Keys: ["doc"]
To Reproduce Steps to reproduce the behavior: I have provided the repo as a flow export JSON below.
Expected behavior When using any Document Loader I expect the same behavior, i.e a new index/class is created in Weaviate and I can converse with the data.
Screenshots
Not working, PDF Loader
Working - Cheerio
{
"nodes": [
{
"width": 300,
"height": 376,
"id": "recursiveCharacterTextSplitter_1",
"position": {
"x": 469.30433129254175,
"y": 111.92838878408651
},
"type": "customNode",
"data": {
"id": "recursiveCharacterTextSplitter_1",
"label": "Recursive Character Text Splitter",
"name": "recursiveCharacterTextSplitter",
"type": "RecursiveCharacterTextSplitter",
"baseClasses": [
"RecursiveCharacterTextSplitter",
"TextSplitter"
],
"category": "Text Splitters",
"description": "Split documents recursively by different characters - starting with \"\n\n\", then \"\n\", then \" \"",
"inputParams": [
{
"label": "Chunk Size",
"name": "chunkSize",
"type": "number",
"default": 1000,
"optional": true,
"id": "recursiveCharacterTextSplitter_1-input-chunkSize-number"
},
{
"label": "Chunk Overlap",
"name": "chunkOverlap",
"type": "number",
"optional": true,
"id": "recursiveCharacterTextSplitter_1-input-chunkOverlap-number"
}
],
"inputAnchors": [],
"inputs": {
"chunkSize": 1000,
"chunkOverlap": ""
},
"outputAnchors": [
{
"id": "recursiveCharacterTextSplitter_1-output-recursiveCharacterTextSplitter-RecursiveCharacterTextSplitter|TextSplitter",
"name": "recursiveCharacterTextSplitter",
"label": "RecursiveCharacterTextSplitter",
"type": "RecursiveCharacterTextSplitter | TextSplitter"
}
],
"outputs": {},
"selected": false
},
"selected": false,
"positionAbsolute": {
"x": 469.30433129254175,
"y": 111.92838878408651
},
"dragging": false
},
{
"width": 300,
"height": 330,
"id": "openAIEmbeddings_1",
"position": {
"x": 817.2208258595176,
"y": 586.8095386455508
},
"type": "customNode",
"data": {
"id": "openAIEmbeddings_1",
"label": "OpenAI Embeddings",
"name": "openAIEmbeddings",
"type": "OpenAIEmbeddings",
"baseClasses": [
"OpenAIEmbeddings",
"Embeddings"
],
"category": "Embeddings",
"description": "OpenAI API to generate embeddings for a given text",
"inputParams": [
{
"label": "OpenAI Api Key",
"name": "openAIApiKey",
"type": "password",
"id": "openAIEmbeddings_1-input-openAIApiKey-password"
},
{
"label": "Strip New Lines",
"name": "stripNewLines",
"type": "boolean",
"optional": true,
"additionalParams": true,
"id": "openAIEmbeddings_1-input-stripNewLines-boolean"
},
{
"label": "Batch Size",
"name": "batchSize",
"type": "number",
"optional": true,
"additionalParams": true,
"id": "openAIEmbeddings_1-input-batchSize-number"
},
{
"label": "Timeout",
"name": "timeout",
"type": "number",
"optional": true,
"additionalParams": true,
"id": "openAIEmbeddings_1-input-timeout-number"
}
],
"inputAnchors": [],
"inputs": {
"stripNewLines": "",
"batchSize": "",
"timeout": ""
},
"outputAnchors": [
{
"id": "openAIEmbeddings_1-output-openAIEmbeddings-OpenAIEmbeddings|Embeddings",
"name": "openAIEmbeddings",
"label": "OpenAIEmbeddings",
"type": "OpenAIEmbeddings | Embeddings"
}
],
"outputs": {},
"selected": false
},
"selected": false,
"positionAbsolute": {
"x": 817.2208258595176,
"y": 586.8095386455508
},
"dragging": false
},
{
"width": 300,
"height": 524,
"id": "chatOpenAI_0",
"position": {
"x": 1209.4550736048013,
"y": -14.587340173809139
},
"type": "customNode",
"data": {
"id": "chatOpenAI_0",
"label": "ChatOpenAI",
"name": "chatOpenAI",
"type": "ChatOpenAI",
"baseClasses": [
"ChatOpenAI",
"BaseChatModel",
"BaseLanguageModel",
"BaseLangChain"
],
"category": "Chat Models",
"description": "Wrapper around OpenAI large language models that use the Chat endpoint",
"inputParams": [
{
"label": "OpenAI Api Key",
"name": "openAIApiKey",
"type": "password",
"id": "chatOpenAI_0-input-openAIApiKey-password"
},
{
"label": "Model Name",
"name": "modelName",
"type": "options",
"options": [
{
"label": "gpt-4",
"name": "gpt-4"
},
{
"label": "gpt-4-0314",
"name": "gpt-4-0314"
},
{
"label": "gpt-4-32k-0314",
"name": "gpt-4-32k-0314"
},
{
"label": "gpt-3.5-turbo",
"name": "gpt-3.5-turbo"
},
{
"label": "gpt-3.5-turbo-0301",
"name": "gpt-3.5-turbo-0301"
}
],
"default": "gpt-3.5-turbo",
"optional": true,
"id": "chatOpenAI_0-input-modelName-options"
},
{
"label": "Temperature",
"name": "temperature",
"type": "number",
"default": 0.9,
"optional": true,
"id": "chatOpenAI_0-input-temperature-number"
},
{
"label": "Max Tokens",
"name": "maxTokens",
"type": "number",
"optional": true,
"additionalParams": true,
"id": "chatOpenAI_0-input-maxTokens-number"
},
{
"label": "Top Probability",
"name": "topP",
"type": "number",
"optional": true,
"additionalParams": true,
"id": "chatOpenAI_0-input-topP-number"
},
{
"label": "Frequency Penalty",
"name": "frequencyPenalty",
"type": "number",
"optional": true,
"additionalParams": true,
"id": "chatOpenAI_0-input-frequencyPenalty-number"
},
{
"label": "Presence Penalty",
"name": "presencePenalty",
"type": "number",
"optional": true,
"additionalParams": true,
"id": "chatOpenAI_0-input-presencePenalty-number"
},
{
"label": "Timeout",
"name": "timeout",
"type": "number",
"optional": true,
"additionalParams": true,
"id": "chatOpenAI_0-input-timeout-number"
}
],
"inputAnchors": [],
"inputs": {
"modelName": "gpt-3.5-turbo",
"temperature": "0.5",
"maxTokens": "",
"topP": "",
"frequencyPenalty": "",
"presencePenalty": "",
"timeout": ""
},
"outputAnchors": [
{
"id": "chatOpenAI_0-output-chatOpenAI-ChatOpenAI|BaseChatModel|BaseLanguageModel|BaseLangChain",
"name": "chatOpenAI",
"label": "ChatOpenAI",
"type": "ChatOpenAI | BaseChatModel | BaseLanguageModel | BaseLangChain"
}
],
"outputs": {},
"selected": false
},
"selected": false,
"positionAbsolute": {
"x": 1209.4550736048013,
"y": -14.587340173809139
},
"dragging": false
},
{
"width": 300,
"height": 332,
"id": "conversationalRetrievalQAChain_0",
"position": {
"x": 1627.1855024401737,
"y": 394.42287890442145
},
"type": "customNode",
"data": {
"id": "conversationalRetrievalQAChain_0",
"label": "Conversational Retrieval QA Chain",
"name": "conversationalRetrievalQAChain",
"type": "ConversationalRetrievalQAChain",
"baseClasses": [
"ConversationalRetrievalQAChain",
"BaseChain",
"BaseLangChain"
],
"category": "Chains",
"description": "Document QA - built on RetrievalQAChain to provide a chat history component",
"inputParams": [
{
"label": "System Message",
"name": "systemMessagePrompt",
"type": "string",
"rows": 4,
"additionalParams": true,
"optional": true,
"placeholder": "I want you to act as a document that I am having a conversation with. Your name is \"AI Assistant\". You will provide me with answers from the given info. If the answer is not included, say exactly \"Hmm, I am not sure.\" and stop after that. Refuse to answer any question not about the info. Never break character.",
"id": "conversationalRetrievalQAChain_0-input-systemMessagePrompt-string"
}
],
"inputAnchors": [
{
"label": "Language Model",
"name": "model",
"type": "BaseLanguageModel",
"id": "conversationalRetrievalQAChain_0-input-model-BaseLanguageModel"
},
{
"label": "Vector Store Retriever",
"name": "vectorStoreRetriever",
"type": "BaseRetriever",
"id": "conversationalRetrievalQAChain_0-input-vectorStoreRetriever-BaseRetriever"
}
],
"inputs": {
"model": "{{chatOpenAI_0.data.instance}}",
"vectorStoreRetriever": "{{weaviateUpsert_0.data.instance}}"
},
"outputAnchors": [
{
"id": "conversationalRetrievalQAChain_0-output-conversationalRetrievalQAChain-ConversationalRetrievalQAChain|BaseChain|BaseLangChain",
"name": "conversationalRetrievalQAChain",
"label": "ConversationalRetrievalQAChain",
"type": "ConversationalRetrievalQAChain | BaseChain | BaseLangChain"
}
],
"outputs": {},
"selected": false
},
"selected": false,
"positionAbsolute": {
"x": 1627.1855024401737,
"y": 394.42287890442145
},
"dragging": false
},
{
"width": 300,
"height": 507,
"id": "pdfFile_0",
"position": {
"x": 849.5620088236952,
"y": -17.40479280857062
},
"type": "customNode",
"data": {
"id": "pdfFile_0",
"label": "Pdf File",
"name": "pdfFile",
"type": "Document",
"baseClasses": [
"Document"
],
"category": "Document Loaders",
"description": "Load data from PDF files",
"inputParams": [
{
"label": "Pdf File",
"name": "pdfFile",
"type": "file",
"fileType": ".pdf",
"id": "pdfFile_0-input-pdfFile-file"
},
{
"label": "Usage",
"name": "usage",
"type": "options",
"options": [
{
"label": "One document per page",
"name": "perPage"
},
{
"label": "One document per file",
"name": "perFile"
}
],
"default": "perPage",
"id": "pdfFile_0-input-usage-options"
},
{
"label": "Metadata",
"name": "metadata",
"type": "json",
"optional": true,
"additionalParams": true,
"id": "pdfFile_0-input-metadata-json"
}
],
"inputAnchors": [
{
"label": "Text Splitter",
"name": "textSplitter",
"type": "TextSplitter",
"optional": true,
"id": "pdfFile_0-input-textSplitter-TextSplitter"
}
],
"inputs": {
"textSplitter": "{{recursiveCharacterTextSplitter_1.data.instance}}",
"usage": "perPage",
"metadata": "{\"doc\":\"test\"}"
},
"outputAnchors": [
{
"id": "pdfFile_0-output-pdfFile-Document",
"name": "pdfFile",
"label": "Document",
"type": "Document"
}
],
"outputs": {},
"selected": false
},
"selected": false,
"positionAbsolute": {
"x": 849.5620088236952,
"y": -17.40479280857062
},
"dragging": false
},
{
"width": 300,
"height": 751,
"id": "weaviateUpsert_0",
"position": {
"x": 1219.7925207643482,
"y": 571.4725224243142
},
"type": "customNode",
"data": {
"id": "weaviateUpsert_0",
"label": "Weaviate Upsert Document",
"name": "weaviateUpsert",
"type": "Weaviate",
"baseClasses": [
"Weaviate",
"VectorStoreRetriever",
"BaseRetriever"
],
"category": "Vector Stores",
"description": "Upsert documents to Weaviate",
"inputParams": [
{
"label": "Weaviate Scheme",
"name": "weaviateScheme",
"type": "options",
"default": "https",
"options": [
{
"label": "https",
"name": "https"
},
{
"label": "http",
"name": "http"
}
],
"id": "weaviateUpsert_0-input-weaviateScheme-options"
},
{
"label": "Weaviate Host",
"name": "weaviateHost",
"type": "string",
"placeholder": "localhost:8080",
"id": "weaviateUpsert_0-input-weaviateHost-string"
},
{
"label": "Weaviate Index",
"name": "weaviateIndex",
"type": "string",
"placeholder": "Test",
"id": "weaviateUpsert_0-input-weaviateIndex-string"
},
{
"label": "Weaviate API Key",
"name": "weaviateApiKey",
"type": "password",
"optional": true,
"id": "weaviateUpsert_0-input-weaviateApiKey-password"
},
{
"label": "Weaviate Text Key",
"name": "weaviateTextKey",
"type": "string",
"placeholder": "text",
"optional": true,
"additionalParams": true,
"id": "weaviateUpsert_0-input-weaviateTextKey-string"
},
{
"label": "Weaviate Metadata Keys",
"name": "weaviateMetadataKeys",
"type": "string",
"rows": 4,
"placeholder": "[\"foo\"]",
"optional": true,
"additionalParams": true,
"id": "weaviateUpsert_0-input-weaviateMetadataKeys-string"
}
],
"inputAnchors": [
{
"label": "Document",
"name": "document",
"type": "Document",
"list": true,
"id": "weaviateUpsert_0-input-document-Document"
},
{
"label": "Embeddings",
"name": "embeddings",
"type": "Embeddings",
"id": "weaviateUpsert_0-input-embeddings-Embeddings"
}
],
"inputs": {
"document": [
"{{pdfFile_0.data.instance}}"
],
"embeddings": "{{openAIEmbeddings_1.data.instance}}",
"weaviateScheme": "https",
"weaviateHost": "weaviate.systems.kubernetes.testing.aws.zen.co.uk/",
"weaviateIndex": "Docs",
"weaviateTextKey": "text",
"weaviateMetadataKeys": "[\"doc\"]"
},
"outputAnchors": [
{
"name": "output",
"label": "Output",
"type": "options",
"options": [
{
"id": "weaviateUpsert_0-output-retriever-Weaviate|VectorStoreRetriever|BaseRetriever",
"name": "retriever",
"label": "Weaviate Retriever",
"type": "Weaviate | VectorStoreRetriever | BaseRetriever"
},
{
"id": "weaviateUpsert_0-output-vectorStore-Weaviate|VectorStore",
"name": "vectorStore",
"label": "Weaviate Vector Store",
"type": "Weaviate | VectorStore"
}
],
"default": "retriever"
}
],
"outputs": {
"output": "retriever"
},
"selected": false
},
"selected": false,
"positionAbsolute": {
"x": 1219.7925207643482,
"y": 571.4725224243142
},
"dragging": false
}
],
"edges": [
{
"source": "chatOpenAI_0",
"sourceHandle": "chatOpenAI_0-output-chatOpenAI-ChatOpenAI|BaseChatModel|BaseLanguageModel|BaseLangChain",
"target": "conversationalRetrievalQAChain_0",
"targetHandle": "conversationalRetrievalQAChain_0-input-model-BaseLanguageModel",
"type": "buttonedge",
"id": "chatOpenAI_0-chatOpenAI_0-output-chatOpenAI-ChatOpenAI|BaseChatModel|BaseLanguageModel|BaseLangChain-conversationalRetrievalQAChain_0-conversationalRetrievalQAChain_0-input-model-BaseLanguageModel",
"data": {
"label": ""
}
},
{
"source": "recursiveCharacterTextSplitter_1",
"sourceHandle": "recursiveCharacterTextSplitter_1-output-recursiveCharacterTextSplitter-RecursiveCharacterTextSplitter|TextSplitter",
"target": "pdfFile_0",
"targetHandle": "pdfFile_0-input-textSplitter-TextSplitter",
"type": "buttonedge",
"id": "recursiveCharacterTextSplitter_1-recursiveCharacterTextSplitter_1-output-recursiveCharacterTextSplitter-RecursiveCharacterTextSplitter|TextSplitter-pdfFile_0-pdfFile_0-input-textSplitter-TextSplitter",
"data": {
"label": ""
}
},
{
"source": "pdfFile_0",
"sourceHandle": "pdfFile_0-output-pdfFile-Document",
"target": "weaviateUpsert_0",
"targetHandle": "weaviateUpsert_0-input-document-Document",
"type": "buttonedge",
"id": "pdfFile_0-pdfFile_0-output-pdfFile-Document-weaviateUpsert_0-weaviateUpsert_0-input-document-Document",
"data": {
"label": ""
}
},
{
"source": "openAIEmbeddings_1",
"sourceHandle": "openAIEmbeddings_1-output-openAIEmbeddings-OpenAIEmbeddings|Embeddings",
"target": "weaviateUpsert_0",
"targetHandle": "weaviateUpsert_0-input-embeddings-Embeddings",
"type": "buttonedge",
"id": "openAIEmbeddings_1-openAIEmbeddings_1-output-openAIEmbeddings-OpenAIEmbeddings|Embeddings-weaviateUpsert_0-weaviateUpsert_0-input-embeddings-Embeddings",
"data": {
"label": ""
}
},
{
"source": "weaviateUpsert_0",
"sourceHandle": "weaviateUpsert_0-output-retriever-Weaviate|VectorStoreRetriever|BaseRetriever",
"target": "conversationalRetrievalQAChain_0",
"targetHandle": "conversationalRetrievalQAChain_0-input-vectorStoreRetriever-BaseRetriever",
"type": "buttonedge",
"id": "weaviateUpsert_0-weaviateUpsert_0-output-retriever-Weaviate|VectorStoreRetriever|BaseRetriever-conversationalRetrievalQAChain_0-conversationalRetrievalQAChain_0-input-vectorStoreRetriever-BaseRetriever",
"data": {
"label": ""
}
}
]
}