chainlit icon indicating copy to clipboard operation
chainlit copied to clipboard

pdf-qa not working:

Open miriam-z opened this issue 1 year ago • 0 comments

Describe the bug

Upload a PDF, in this case Apple SEC 10K

Screenshot 2024-04-28 at 10 23 06

(400) Reason: Bad Request pinecone.core.client.exceptions.PineconeApiException: (400) Reason: Bad Request HTTP response headers: HTTPHeaderDict({'Date': 'Sun, 28 Apr 2024 02:20:51 GMT', 'Content-Type': 'application/json', 'Content-Length': '101', 'Connection': 'keep-alive', 'x-pinecone-request-latency-ms': '1560', 'x-pinecone-request-id': '7850253967941358202', 'x-envoy-upstream-service-time': '384', 'server': 'envoy'}) HTTP response body: {"code":3,"message":"Vector dimension 1536 does not match the dimension of the index 8","details":[]}

2024-04-28 10:20:50 - HTTP Request: POST https://cloud.getliteral.ai/api/graphql "HTTP/1.1 200 OK" 2024-04-28 10:20:50 - Failed to send steps: [{'message': 'Unknown type "FeedbackPayloadInput". Did you mean "ThreadPayloadInput", "GenerationPayloadInput", or "ScorePayloadInput"?', 'locations': [{'line': 14, 'column': 22}]}, {'message': 'Unknown argument "feedback" on field "Mutation.ingestStep".', 'locations': [{'line': 31, 'column': 9}]}]

To Reproduce Steps to reproduce the behavior:

pdf-qa.py

import os
from typing import List
from langchain.document_loaders import PyPDFLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores.pinecone import Pinecone
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.memory import ChatMessageHistory, ConversationBufferMemory
from langchain.docstore.document import Document

import pinecone

import chainlit as cl
from chainlit.types import AskFileResponse

pinecone_client = pinecone.Pinecone(
    api_key=os.environ.get("PINECONE_API_KEY"),
    environment=os.environ.get("PINECONE_ENV"),
)

index_name = "quickstart"
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
embeddings = OpenAIEmbeddings()

namespaces = set()

welcome_message = """Welcome to the Chainlit PDF QA demo! To get started:
1. Upload a PDF or text file
2. Ask a question about the file
"""


def process_file(file: AskFileResponse):
    if file.type == "text/plain":
        Loader = TextLoader
    elif file.type == "application/pdf":
        Loader = PyPDFLoader

        loader = Loader(file.path)
        documents = loader.load()
        docs = text_splitter.split_documents(documents)
        for i, doc in enumerate(docs):
            doc.metadata["source"] = f"source_{i}"
        return docs


def get_docsearch(file: AskFileResponse):
    docs = process_file(file)

    # Save data in the user session
    cl.user_session.set("docs", docs)

    # Create a unique namespace for the file
    namespace = file.id

    if namespace in namespaces:
        docsearch = Pinecone.from_existing_index(
            index_name=index_name, embedding=embeddings, namespace=namespace
        )
    else:
        docsearch = Pinecone.from_documents(
            docs, embeddings, index_name=index_name, namespace=namespace
        )
        namespaces.add(namespace)

    return docsearch


@cl.on_chat_start
async def start():
    await cl.Avatar(
        name="Chatbot",
        url="https://avatars.githubusercontent.com/u/128686189?s=400&u=a1d1553023f8ea0921fba0debbe92a8c5f840dd9&v=4",
    ).send()
    files = None
    while files is None:
        files = await cl.AskFileMessage(
            content=welcome_message,
            accept=["text/plain", "application/pdf"],
            max_size_mb=20,
            timeout=180,
        ).send()

    file = files[0]

    msg = cl.Message(content=f"Processing `{file.name}`...", disable_feedback=True)
    await msg.send()

    # No async implementation in the Pinecone client, fallback to sync
    docsearch = await cl.make_async(get_docsearch)(file)

    message_history = ChatMessageHistory()

    memory = ConversationBufferMemory(
        memory_key="chat_history",
        output_key="answer",
        chat_memory=message_history,
        return_messages=True,
    )

    chain = ConversationalRetrievalChain.from_llm(
        ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, streaming=True),
        chain_type="stuff",
        retriever=docsearch.as_retriever(),
        memory=memory,
        return_source_documents=True,
    )

    # Let the user know that the system is ready
    msg.content = f"`{file.name}` processed. You can now ask questions!"
    await msg.update()

    cl.user_session.set("chain", chain)


@cl.on_message
async def main(message: cl.Message):
    chain = cl.user_session.get("chain")  # type: ConversationalRetrievalChain
    cb = cl.AsyncLangchainCallbackHandler()
    res = await chain.acall(message.content, callbacks=[cb])
    answer = res["answer"]
    source_documents = res["source_documents"]  # type: List[Document]

    text_elements = []  # type: List[cl.Text]

    if source_documents:
        for source_idx, source_doc in enumerate(source_documents):
            source_name = f"source_{source_idx}"
            # Create the text element referenced in the message
            text_elements.append(
                cl.Text(content=source_doc.page_content, name=source_name)
            )
        source_names = [text_el.name for text_el in text_elements]

        if source_names:
            answer += f"\nSources: {', '.join(source_names)}"
        else:
            answer += "\nNo sources found"

    await cl.Message(content=answer, elements=text_elements).send()

Expected behavior results with citation

Screenshots If applicable, add screenshots to help explain your problem.

Desktop (please complete the following information):

  • OS: [e.g. iOS]
  • Browser: chrome
  • Version : 123.0.6312.122
  • Chainlit: 1.0.401

python --version
Python 3.10.7

Additional context

Full log:

2024-04-28 10:30:52 - Your app is available at http://localhost:8000 2024-04-28 10:31:03 - 2 changes detected 2024-04-28 10:31:04 - HTTP Request: POST https://cloud.getliteral.ai/api/graphql "HTTP/1.1 200 OK" 2024-04-28 10:31:05 - HTTP Request: POST https://cloud.getliteral.ai/api/upload/file "HTTP/1.1 200 OK" 2024-04-28 10:31:06 - HTTP Request: POST https://storage.googleapis.com/literal-bucket/ "HTTP/1.1 204 No Content" 2024-04-28 10:31:07 - HTTP Request: POST https://cloud.getliteral.ai/api/graphql "HTTP/1.1 200 OK" 2024-04-28 10:31:07 - Failed to send steps: [{'message': 'Unknown type "FeedbackPayloadInput". Did you mean "ThreadPayloadInput", "GenerationPayloadInput", or "ScorePayloadInput"?', 'locations': [{'line': 14, 'column': 22}]}, {'message': 'Unknown argument "feedback" on field "Mutation.ingestStep".', 'locations': [{'line': 31, 'column': 9}]}] 2024-04-28 10:31:07 - Error while flushing create_element: [{'message': 'Unknown type "FeedbackPayloadInput". Did you mean "ThreadPayloadInput", "GenerationPayloadInput", or "ScorePayloadInput"?', 'locations': [{'line': 14, 'column': 22}]}, {'message': 'Unknown argument "feedback" on field "Mutation.ingestStep".', 'locations': [{'line': 31, 'column': 9}]}] 2024-04-28 10:31:07 - HTTP Request: POST https://cloud.getliteral.ai/api/graphql "HTTP/1.1 200 OK" 2024-04-28 10:31:07 - Failed to send steps: [{'message': 'Unknown type "FeedbackPayloadInput". Did you mean "ThreadPayloadInput", "GenerationPayloadInput", or "ScorePayloadInput"?', 'locations': [{'line': 14, 'column': 22}]}, {'message': 'Unknown argument "feedback" on field "Mutation.ingestStep".', 'locations': [{'line': 31, 'column': 9}]}] 2024-04-28 10:31:07 - Error while flushing create_step: [{'message': 'Unknown type "FeedbackPayloadInput". Did you mean "ThreadPayloadInput", "GenerationPayloadInput", or "ScorePayloadInput"?', 'locations': [{'line': 14, 'column': 22}]}, {'message': 'Unknown argument "feedback" on field "Mutation.ingestStep".', 'locations': [{'line': 31, 'column': 9}]}] 2024-04-28 10:31:08 - HTTP Request: POST https://cloud.getliteral.ai/api/graphql "HTTP/1.1 200 OK" 2024-04-28 10:31:08 - Failed to send steps: [{'message': 'Unknown type "FeedbackPayloadInput". Did you mean "ThreadPayloadInput", "GenerationPayloadInput", or "ScorePayloadInput"?', 'locations': [{'line': 14, 'column': 22}]}, {'message': 'Unknown argument "feedback" on field "Mutation.ingestStep".', 'locations': [{'line': 31, 'column': 9}]}] 2024-04-28 10:31:10 - Task exception was never retrieved future: <Task finished name='Task-181' coro=<ChainlitDataLayer.create_step() done, defined at /Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/chainlit/data/init.py:31> exception=Exception([{'message': 'Unknown type "FeedbackPayloadInput". Did you mean "ThreadPayloadInput", "GenerationPayloadInput", or "ScorePayloadInput"?', 'locations': [{'line': 14, 'column': 22}]}, {'message': 'Unknown argument "feedback" on field "Mutation.ingestStep".', 'locations': [{'line': 31, 'column': 9}]}])> Traceback (most recent call last): File "/Users/mincheung/.asdf/installs/python/3.10.7/lib/python3.10/asyncio/tasks.py", line 232, in __step result = coro.send(None) File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/chainlit/data/init.py", line 46, in wrapper return await method(self, *args, **kwargs) File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/chainlit/data/init.py", line 326, in create_step await self.client.api.send_steps([step]) File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/literalai/api.py", line 1147, in send_steps return await self.make_api_call("send steps", query, variables) File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/literalai/api.py", line 251, in make_api_call raise_error(json["errors"]) File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/literalai/api.py", line 235, in raise_error raise Exception(error) Exception: [{'message': 'Unknown type "FeedbackPayloadInput". Did you mean "ThreadPayloadInput", "GenerationPayloadInput", or "ScorePayloadInput"?', 'locations': [{'line': 14, 'column': 22}]}, {'message': 'Unknown argument "feedback" on field "Mutation.ingestStep".', 'locations': [{'line': 31, 'column': 9}]}] 2024-04-28 10:31:22 - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK" 2024-04-28 10:31:27 - (400) Reason: Bad Request HTTP response headers: HTTPHeaderDict({'Date': 'Sun, 28 Apr 2024 02:31:29 GMT', 'Content-Type': 'application/json', 'Content-Length': '101', 'Connection': 'keep-alive', 'x-pinecone-request-latency-ms': '1498', 'x-pinecone-request-id': '6494146641853823921', 'x-envoy-upstream-service-time': '339', 'server': 'envoy'}) HTTP response body: {"code":3,"message":"Vector dimension 1536 does not match the dimension of the index 8","details":[]} Traceback (most recent call last): File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/chainlit/utils.py", line 39, in wrapper return await user_function(**params_values) File "/Users/mincheung/Documents/chainlit-cookbook/pdf-qa/app.py", line 96, in start docsearch = await cl.make_async(get_docsearch)(file) File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/asyncer/_main.py", line 358, in wrapper return await anyio.to_thread.run_sync( File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/anyio/to_thread.py", line 33, in run_sync return await get_asynclib().run_sync_in_worker_thread( File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 877, in run_sync_in_worker_thread return await future File "/Users/mincheung/.asdf/installs/python/3.10.7/lib/python3.10/asyncio/futures.py", line 285, in await yield self # This tells Task to wait for completion. File "/Users/mincheung/.asdf/installs/python/3.10.7/lib/python3.10/asyncio/tasks.py", line 304, in __wakeup future.result() File "/Users/mincheung/.asdf/installs/python/3.10.7/lib/python3.10/asyncio/futures.py", line 201, in result raise self._exception.with_traceback(self._exception_tb) File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 807, in run result = context.run(func, *args) File "/Users/mincheung/Documents/chainlit-cookbook/pdf-qa/app.py", line 67, in get_docsearch docsearch = Pinecone.from_documents( File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/langchain_core/vectorstores.py", line 508, in from_documents return cls.from_texts(texts, embedding, metadatas=metadatas, **kwargs) File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/langchain_community/vectorstores/pinecone.py", line 434, in from_texts pinecone.add_texts( File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/langchain_community/vectorstores/pinecone.py", line 157, in add_texts [res.get() for res in async_res] File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/langchain_community/vectorstores/pinecone.py", line 157, in [res.get() for res in async_res] File "/Users/mincheung/.asdf/installs/python/3.10.7/lib/python3.10/multiprocessing/pool.py", line 774, in get raise self._value File "/Users/mincheung/.asdf/installs/python/3.10.7/lib/python3.10/multiprocessing/pool.py", line 125, in worker result = (True, func(*args, **kwds)) File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/pinecone/core/client/api_client.py", line 203, in __call_api raise e File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/pinecone/core/client/api_client.py", line 196, in __call_api response_data = self.request( File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/pinecone/core/client/api_client.py", line 455, in request return self.rest_client.POST(url, File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/pinecone/core/client/rest.py", line 302, in POST return self.request("POST", url, File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/pinecone/core/client/rest.py", line 261, in request raise PineconeApiException(http_resp=r) pinecone.core.client.exceptions.PineconeApiException: (400) Reason: Bad Request HTTP response headers: HTTPHeaderDict({'Date': 'Sun, 28 Apr 2024 02:31:29 GMT', 'Content-Type': 'application/json', 'Content-Length': '101', 'Connection': 'keep-alive', 'x-pinecone-request-latency-ms': '1498', 'x-pinecone-request-id': '6494146641853823921', 'x-envoy-upstream-service-time': '339', 'server': 'envoy'}) HTTP response body: {"code":3,"message":"Vector dimension 1536 does not match the dimension of the index 8","details":[]}

2024-04-28 10:31:28 - HTTP Request: POST https://cloud.getliteral.ai/api/graphql "HTTP/1.1 200 OK" 2024-04-28 10:31:28 - Failed to send steps: [{'message': 'Unknown type "FeedbackPayloadInput". Did you mean "ThreadPayloadInput", "GenerationPayloadInput", or "ScorePayloadInput"?', 'locations': [{'line': 14, 'column': 22}]}, {'message': 'Unknown argument "feedback" on field "Mutation.ingestStep".', 'locations': [{'line': 31, 'column': 9}]}] Add any other context about the problem here.

EDIT:

After

pip install -U pinecone-client langchain

type object 'Pinecone' has no attribute 'from_documents'

miriam-z avatar Apr 28 '24 02:04 miriam-z