chainlit icon indicating copy to clipboard operation
chainlit copied to clipboard

Processing Multiple Documents to Chat With

Open Hiten-03 opened this issue 2 months ago • 0 comments

Describe the bug I am working on a project where a user can chat with multiple documents which are uploaded by the user. For single document it works fine. But when I upload 2-3 documents, it only takes last document and give answers only related to the last document

Below is my code

@cl.on_chat_start
async def on_chat_start():
    
    files = await cl.AskFileMessage(
        content="Please upload a text file to begin!",
        accept=["text/plain","text/csv", "application/pdf"],
        max_size_mb=10,
        max_files=3,
        timeout=180,
    ).send()


    for file in files:
        msg = cl.Message(content=f"Processing `{file.name}`...", disable_feedback=True)
        await msg.send()


        if file.name.split('.')[1] == "pdf":
            elements = [
                cl.Pdf(name=file.name, display="inline", path=file.path)
            ]
            # Reminder: The name of the pdf must be in the content of the message
            await cl.Message(content="Here is your uploaded PDF!", elements=elements).send()
            print('its a file path',file.path)
            loader = PyPDFLoader(file.path)
            data = loader.load()
            # print('data - type',data, type(data),dir(data))
            text_splitter = RecursiveCharacterTextSplitter(
                chunk_size=1000,
                chunk_overlap=100,
                separators=["\n\n", "\n", " ", ""]
            )
            docs = text_splitter.split_documents(data)

        client = chromadb.EphemeralClient()
        client_settings = Settings(
            chroma_db_impl="duckdb+parquet",
            anonymized_telemetry=False,
            allow_reset=True,
        )

        collection_name = ''.join(random.choices(string.ascii_letters + string.digits, k=16))

        docsearch = await cl.make_async(Chroma.from_documents)(
            client=client,
            documents=docs,
            embedding=embeddings,
            collection_name = collection_name,
            client_settings=client_settings
        )

        message_history = ChatMessageHistory()

        memory = ConversationBufferMemory(
            memory_key="chat_history",
            output_key="answer",
            chat_memory=message_history,
            return_messages=True,
        )

        PROMPT = PromptTemplate(template=prompt_template,
                                input_variables=["context", "question"])

     
        chain = ConversationalRetrievalChain.from_llm(
            llm= llm,
            chain_type="stuff",
            retriever=docsearch.as_retriever(),
            memory=memory,
            return_source_documents=True,
            combine_docs_chain_kwargs={"prompt": PROMPT},
        )

        await cl.Message(content="Hello, I am Smart Document Assistant. I am here to help you with any question you may have about the uploaded document."+"\n", disable_feedback=False).send()


        cl.user_session.set("chain", chain)

Expected behavior It should process multiple documents and should answer questions based on all documents uploaded

Hiten-03 avatar Apr 17 '24 09:04 Hiten-03