openai-python icon indicating copy to clipboard operation
openai-python copied to clipboard

I couldn't upload file an use the one at thread normally via openai library...

Open alex-deus opened this issue 1 year ago • 1 comments

Confirm this is an issue with the Python library and not an underlying OpenAI API

  • [X] This is an issue with the Python library

Describe the bug

I uploaded file to vector storage, but I couldn't use the file at threads: It seems there was an error while trying to search the uploaded files. Could you please try uploading the file again, or let me know if there is a specific file you want me to look into?.

To Reproduce

  1. create vector_store
  2. create assistant
  3. upload file
  4. wait the uploading
  5. attach file to vector store
  6. create thread
  7. create run
  8. wait completing the run
  9. get messages
  10. take run's message

Code snippets

import json
import os
import time

from openai import OpenAI

client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

vector_store = client.beta.vector_stores.create(name="Test")

assistant = client.beta.assistants.create(
    description=f"Test",
    model="gpt-4o",
    tools=[{"type": "file_search"}],
    tool_resources={"file_search": {"vector_store_ids": [vector_store.id]}},
    temperature=0.4
)

# Upload file
file = client.files.create(file=("data.json", json.dumps({"name": "Alexbabaliks"}).encode()), purpose="assistants")
while True:
    file_status = client.files.retrieve(file_id=file.id)
    if file_status.status == 'processed':
        break
    time.sleep(1)

client.beta.vector_stores.files.create(vector_store_id=vector_store.id, file_id=file.id)
while True:
    vector_store = client.beta.vector_stores.retrieve(vector_store_id=vector_store.id)
    if vector_store.status == 'completed':
        break
    time.sleep(1)

thread = client.beta.threads.create(tool_resources={"file_search": {"vector_store_ids": [vector_store.id]}})

run = client.beta.threads.runs.create(
    instructions="What is my name??? Take it from JSON file and return JSON in format {'name': '<name>'}",
    thread_id=thread.id,
    assistant_id=assistant.id,
    model="gpt-4o",
    temperature=0.4,
    tools=[{"type": "file_search"}],
)

while True:
    run_status = client.beta.threads.runs.retrieve(thread_id=thread.id, run_id=run.id)
    if run_status.status == "completed":
        break
    elif run_status.status == "failed":
        break

    time.sleep(2)

answer = ""
messages = client.beta.threads.messages.list(thread_id=thread.id)
for message in messages.data:
    if run.id != message.run_id:
        continue

    for content in message.content:
        if content.type == "text":
            answer = content.text.value
            break

print(answer)

OS

Linux

Python version

3.11.1

Library version

openai v1.51.0

alex-deus avatar Oct 04 '24 15:10 alex-deus

I came up with a solution, but it doesn’t work well for large JSON files (around 5MB), and sometimes for smaller simple file, it returns: {"n": "I couldn't find your given name in the provided documents."} :-D

import json
import os
import time

from openai import OpenAI

TEMPERATURE = 0.2


def ask(client: OpenAI, assistant_id: str, thread_id: str, file_id: str, instruction: str) -> str:
    client.beta.threads.messages.create(
        thread_id=thread_id,
        content=instruction,
        role="user",
        attachments=[{"file_id": file_id, "tools": [{"type": "file_search"}]}],
    )

    run = client.beta.threads.runs.create(
        thread_id=thread_id,
        model="gpt-4o",
        tools=[{"type": "file_search"}],
        assistant_id=assistant_id,
        temperature=TEMPERATURE
    )

    count = 0
    while True:
        run = client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run.id)
        if run.status == "completed":
            break
        elif run.status == "failed":
            raise Exception(f"{run.last_error.code}: {run.last_error.message}")

        count += 1
        if count > 30:
            raise Exception("Too many requests")
        else:
            time.sleep(4)

    answer = ""
    messages = client.beta.threads.messages.list(thread_id=thread_id)
    for message in messages.data:
        if run.id != message.run_id:
            continue

        for content in message.content:
            if content.type == "text":
                answer = content.text.value
                break

    return answer


def main() -> None:
    client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
    
    # Create assistant
    assistant = client.beta.assistants.create(
        description="Test",
        instructions="Return all answer in JSON format",
        model="gpt-4o",
        tools=[{"type": "file_search"}],
        temperature=TEMPERATURE
    )
    
    # Upload file
    data = ("data.json", json.dumps({"given_name": "John", "family_name": "Smit"}).encode())
    file = client.files.create(file=data, purpose="assistants")
    
    thread = client.beta.threads.create()  # Create thread
    
    instruction = "What is my given_name? Answer format {'n': '<name>'}"
    answer = ask(client, assistant.id, thread.id, file.id, instruction)
    print(answer)  # Sometimes answer could be {"n": "I couldn't find your given name in the provided documents."}
    
    instruction = "What is my last family_name? Answer format {'l': '<last name>'}"
    answer = ask(client, assistant.id, thread.id, file.id, instruction)
    print(answer)


if __name__ == '__main__':
    main()

alex-deus avatar Oct 05 '24 07:10 alex-deus

Really sorry for the delayed response.

This sounds like an issue with the underlying OpenAI API and not the SDK, so I'm going to go ahead and close this issue.

Would you mind reposting at community.openai.com if you're still running into this?

RobertCraigie avatar Jan 22 '25 20:01 RobertCraigie