chainlit
chainlit copied to clipboard
pdf-qa not working:
Describe the bug
Upload a PDF, in this case Apple SEC 10K
(400) Reason: Bad Request pinecone.core.client.exceptions.PineconeApiException: (400) Reason: Bad Request HTTP response headers: HTTPHeaderDict({'Date': 'Sun, 28 Apr 2024 02:20:51 GMT', 'Content-Type': 'application/json', 'Content-Length': '101', 'Connection': 'keep-alive', 'x-pinecone-request-latency-ms': '1560', 'x-pinecone-request-id': '7850253967941358202', 'x-envoy-upstream-service-time': '384', 'server': 'envoy'}) HTTP response body: {"code":3,"message":"Vector dimension 1536 does not match the dimension of the index 8","details":[]}
2024-04-28 10:20:50 - HTTP Request: POST https://cloud.getliteral.ai/api/graphql "HTTP/1.1 200 OK" 2024-04-28 10:20:50 - Failed to send steps: [{'message': 'Unknown type "FeedbackPayloadInput". Did you mean "ThreadPayloadInput", "GenerationPayloadInput", or "ScorePayloadInput"?', 'locations': [{'line': 14, 'column': 22}]}, {'message': 'Unknown argument "feedback" on field "Mutation.ingestStep".', 'locations': [{'line': 31, 'column': 9}]}]
To Reproduce Steps to reproduce the behavior:
pdf-qa.py
import os
from typing import List
from langchain.document_loaders import PyPDFLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores.pinecone import Pinecone
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.memory import ChatMessageHistory, ConversationBufferMemory
from langchain.docstore.document import Document
import pinecone
import chainlit as cl
from chainlit.types import AskFileResponse
pinecone_client = pinecone.Pinecone(
api_key=os.environ.get("PINECONE_API_KEY"),
environment=os.environ.get("PINECONE_ENV"),
)
index_name = "quickstart"
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
embeddings = OpenAIEmbeddings()
namespaces = set()
welcome_message = """Welcome to the Chainlit PDF QA demo! To get started:
1. Upload a PDF or text file
2. Ask a question about the file
"""
def process_file(file: AskFileResponse):
if file.type == "text/plain":
Loader = TextLoader
elif file.type == "application/pdf":
Loader = PyPDFLoader
loader = Loader(file.path)
documents = loader.load()
docs = text_splitter.split_documents(documents)
for i, doc in enumerate(docs):
doc.metadata["source"] = f"source_{i}"
return docs
def get_docsearch(file: AskFileResponse):
docs = process_file(file)
# Save data in the user session
cl.user_session.set("docs", docs)
# Create a unique namespace for the file
namespace = file.id
if namespace in namespaces:
docsearch = Pinecone.from_existing_index(
index_name=index_name, embedding=embeddings, namespace=namespace
)
else:
docsearch = Pinecone.from_documents(
docs, embeddings, index_name=index_name, namespace=namespace
)
namespaces.add(namespace)
return docsearch
@cl.on_chat_start
async def start():
await cl.Avatar(
name="Chatbot",
url="https://avatars.githubusercontent.com/u/128686189?s=400&u=a1d1553023f8ea0921fba0debbe92a8c5f840dd9&v=4",
).send()
files = None
while files is None:
files = await cl.AskFileMessage(
content=welcome_message,
accept=["text/plain", "application/pdf"],
max_size_mb=20,
timeout=180,
).send()
file = files[0]
msg = cl.Message(content=f"Processing `{file.name}`...", disable_feedback=True)
await msg.send()
# No async implementation in the Pinecone client, fallback to sync
docsearch = await cl.make_async(get_docsearch)(file)
message_history = ChatMessageHistory()
memory = ConversationBufferMemory(
memory_key="chat_history",
output_key="answer",
chat_memory=message_history,
return_messages=True,
)
chain = ConversationalRetrievalChain.from_llm(
ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, streaming=True),
chain_type="stuff",
retriever=docsearch.as_retriever(),
memory=memory,
return_source_documents=True,
)
# Let the user know that the system is ready
msg.content = f"`{file.name}` processed. You can now ask questions!"
await msg.update()
cl.user_session.set("chain", chain)
@cl.on_message
async def main(message: cl.Message):
chain = cl.user_session.get("chain") # type: ConversationalRetrievalChain
cb = cl.AsyncLangchainCallbackHandler()
res = await chain.acall(message.content, callbacks=[cb])
answer = res["answer"]
source_documents = res["source_documents"] # type: List[Document]
text_elements = [] # type: List[cl.Text]
if source_documents:
for source_idx, source_doc in enumerate(source_documents):
source_name = f"source_{source_idx}"
# Create the text element referenced in the message
text_elements.append(
cl.Text(content=source_doc.page_content, name=source_name)
)
source_names = [text_el.name for text_el in text_elements]
if source_names:
answer += f"\nSources: {', '.join(source_names)}"
else:
answer += "\nNo sources found"
await cl.Message(content=answer, elements=text_elements).send()
Expected behavior results with citation
Screenshots If applicable, add screenshots to help explain your problem.
Desktop (please complete the following information):
- OS: [e.g. iOS]
- Browser: chrome
- Version : 123.0.6312.122
- Chainlit: 1.0.401
python --version
Python 3.10.7
Additional context
Full log:
2024-04-28 10:30:52 - Your app is available at http://localhost:8000
2024-04-28 10:31:03 - 2 changes detected
2024-04-28 10:31:04 - HTTP Request: POST https://cloud.getliteral.ai/api/graphql "HTTP/1.1 200 OK"
2024-04-28 10:31:05 - HTTP Request: POST https://cloud.getliteral.ai/api/upload/file "HTTP/1.1 200 OK"
2024-04-28 10:31:06 - HTTP Request: POST https://storage.googleapis.com/literal-bucket/ "HTTP/1.1 204 No Content"
2024-04-28 10:31:07 - HTTP Request: POST https://cloud.getliteral.ai/api/graphql "HTTP/1.1 200 OK"
2024-04-28 10:31:07 - Failed to send steps: [{'message': 'Unknown type "FeedbackPayloadInput". Did you mean "ThreadPayloadInput", "GenerationPayloadInput", or "ScorePayloadInput"?', 'locations': [{'line': 14, 'column': 22}]}, {'message': 'Unknown argument "feedback" on field "Mutation.ingestStep".', 'locations': [{'line': 31, 'column': 9}]}]
2024-04-28 10:31:07 - Error while flushing create_element: [{'message': 'Unknown type "FeedbackPayloadInput". Did you mean "ThreadPayloadInput", "GenerationPayloadInput", or "ScorePayloadInput"?', 'locations': [{'line': 14, 'column': 22}]}, {'message': 'Unknown argument "feedback" on field "Mutation.ingestStep".', 'locations': [{'line': 31, 'column': 9}]}]
2024-04-28 10:31:07 - HTTP Request: POST https://cloud.getliteral.ai/api/graphql "HTTP/1.1 200 OK"
2024-04-28 10:31:07 - Failed to send steps: [{'message': 'Unknown type "FeedbackPayloadInput". Did you mean "ThreadPayloadInput", "GenerationPayloadInput", or "ScorePayloadInput"?', 'locations': [{'line': 14, 'column': 22}]}, {'message': 'Unknown argument "feedback" on field "Mutation.ingestStep".', 'locations': [{'line': 31, 'column': 9}]}]
2024-04-28 10:31:07 - Error while flushing create_step: [{'message': 'Unknown type "FeedbackPayloadInput". Did you mean "ThreadPayloadInput", "GenerationPayloadInput", or "ScorePayloadInput"?', 'locations': [{'line': 14, 'column': 22}]}, {'message': 'Unknown argument "feedback" on field "Mutation.ingestStep".', 'locations': [{'line': 31, 'column': 9}]}]
2024-04-28 10:31:08 - HTTP Request: POST https://cloud.getliteral.ai/api/graphql "HTTP/1.1 200 OK"
2024-04-28 10:31:08 - Failed to send steps: [{'message': 'Unknown type "FeedbackPayloadInput". Did you mean "ThreadPayloadInput", "GenerationPayloadInput", or "ScorePayloadInput"?', 'locations': [{'line': 14, 'column': 22}]}, {'message': 'Unknown argument "feedback" on field "Mutation.ingestStep".', 'locations': [{'line': 31, 'column': 9}]}]
2024-04-28 10:31:10 - Task exception was never retrieved
future: <Task finished name='Task-181' coro=<ChainlitDataLayer.create_step() done, defined at /Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/chainlit/data/init.py:31> exception=Exception([{'message': 'Unknown type "FeedbackPayloadInput". Did you mean "ThreadPayloadInput", "GenerationPayloadInput", or "ScorePayloadInput"?', 'locations': [{'line': 14, 'column': 22}]}, {'message': 'Unknown argument "feedback" on field "Mutation.ingestStep".', 'locations': [{'line': 31, 'column': 9}]}])>
Traceback (most recent call last):
File "/Users/mincheung/.asdf/installs/python/3.10.7/lib/python3.10/asyncio/tasks.py", line 232, in __step
result = coro.send(None)
File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/chainlit/data/init.py", line 46, in wrapper
return await method(self, *args, **kwargs)
File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/chainlit/data/init.py", line 326, in create_step
await self.client.api.send_steps([step])
File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/literalai/api.py", line 1147, in send_steps
return await self.make_api_call("send steps", query, variables)
File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/literalai/api.py", line 251, in make_api_call
raise_error(json["errors"])
File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/literalai/api.py", line 235, in raise_error
raise Exception(error)
Exception: [{'message': 'Unknown type "FeedbackPayloadInput". Did you mean "ThreadPayloadInput", "GenerationPayloadInput", or "ScorePayloadInput"?', 'locations': [{'line': 14, 'column': 22}]}, {'message': 'Unknown argument "feedback" on field "Mutation.ingestStep".', 'locations': [{'line': 31, 'column': 9}]}]
2024-04-28 10:31:22 - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-04-28 10:31:27 - (400)
Reason: Bad Request
HTTP response headers: HTTPHeaderDict({'Date': 'Sun, 28 Apr 2024 02:31:29 GMT', 'Content-Type': 'application/json', 'Content-Length': '101', 'Connection': 'keep-alive', 'x-pinecone-request-latency-ms': '1498', 'x-pinecone-request-id': '6494146641853823921', 'x-envoy-upstream-service-time': '339', 'server': 'envoy'})
HTTP response body: {"code":3,"message":"Vector dimension 1536 does not match the dimension of the index 8","details":[]}
Traceback (most recent call last):
File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/chainlit/utils.py", line 39, in wrapper
return await user_function(**params_values)
File "/Users/mincheung/Documents/chainlit-cookbook/pdf-qa/app.py", line 96, in start
docsearch = await cl.make_async(get_docsearch)(file)
File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/asyncer/_main.py", line 358, in wrapper
return await anyio.to_thread.run_sync(
File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/anyio/to_thread.py", line 33, in run_sync
return await get_asynclib().run_sync_in_worker_thread(
File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 877, in run_sync_in_worker_thread
return await future
File "/Users/mincheung/.asdf/installs/python/3.10.7/lib/python3.10/asyncio/futures.py", line 285, in await
yield self # This tells Task to wait for completion.
File "/Users/mincheung/.asdf/installs/python/3.10.7/lib/python3.10/asyncio/tasks.py", line 304, in __wakeup
future.result()
File "/Users/mincheung/.asdf/installs/python/3.10.7/lib/python3.10/asyncio/futures.py", line 201, in result
raise self._exception.with_traceback(self._exception_tb)
File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 807, in run
result = context.run(func, *args)
File "/Users/mincheung/Documents/chainlit-cookbook/pdf-qa/app.py", line 67, in get_docsearch
docsearch = Pinecone.from_documents(
File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/langchain_core/vectorstores.py", line 508, in from_documents
return cls.from_texts(texts, embedding, metadatas=metadatas, **kwargs)
File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/langchain_community/vectorstores/pinecone.py", line 434, in from_texts
pinecone.add_texts(
File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/langchain_community/vectorstores/pinecone.py", line 157, in add_texts
[res.get() for res in async_res]
File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/langchain_community/vectorstores/pinecone.py", line 157, in
2024-04-28 10:31:28 - HTTP Request: POST https://cloud.getliteral.ai/api/graphql "HTTP/1.1 200 OK" 2024-04-28 10:31:28 - Failed to send steps: [{'message': 'Unknown type "FeedbackPayloadInput". Did you mean "ThreadPayloadInput", "GenerationPayloadInput", or "ScorePayloadInput"?', 'locations': [{'line': 14, 'column': 22}]}, {'message': 'Unknown argument "feedback" on field "Mutation.ingestStep".', 'locations': [{'line': 31, 'column': 9}]}] Add any other context about the problem here.
EDIT:
After
pip install -U pinecone-client langchain
type object 'Pinecone' has no attribute 'from_documents'