I couldn't upload file an use the one at thread normally via openai library...
Confirm this is an issue with the Python library and not an underlying OpenAI API
- [X] This is an issue with the Python library
Describe the bug
I uploaded file to vector storage, but I couldn't use the file at threads:
It seems there was an error while trying to search the uploaded files. Could you please try uploading the file again, or let me know if there is a specific file you want me to look into?.
To Reproduce
- create vector_store
- create assistant
- upload file
- wait the uploading
- attach file to vector store
- create thread
- create run
- wait completing the run
- get messages
- take run's message
Code snippets
import json
import os
import time
from openai import OpenAI
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
vector_store = client.beta.vector_stores.create(name="Test")
assistant = client.beta.assistants.create(
description=f"Test",
model="gpt-4o",
tools=[{"type": "file_search"}],
tool_resources={"file_search": {"vector_store_ids": [vector_store.id]}},
temperature=0.4
)
# Upload file
file = client.files.create(file=("data.json", json.dumps({"name": "Alexbabaliks"}).encode()), purpose="assistants")
while True:
file_status = client.files.retrieve(file_id=file.id)
if file_status.status == 'processed':
break
time.sleep(1)
client.beta.vector_stores.files.create(vector_store_id=vector_store.id, file_id=file.id)
while True:
vector_store = client.beta.vector_stores.retrieve(vector_store_id=vector_store.id)
if vector_store.status == 'completed':
break
time.sleep(1)
thread = client.beta.threads.create(tool_resources={"file_search": {"vector_store_ids": [vector_store.id]}})
run = client.beta.threads.runs.create(
instructions="What is my name??? Take it from JSON file and return JSON in format {'name': '<name>'}",
thread_id=thread.id,
assistant_id=assistant.id,
model="gpt-4o",
temperature=0.4,
tools=[{"type": "file_search"}],
)
while True:
run_status = client.beta.threads.runs.retrieve(thread_id=thread.id, run_id=run.id)
if run_status.status == "completed":
break
elif run_status.status == "failed":
break
time.sleep(2)
answer = ""
messages = client.beta.threads.messages.list(thread_id=thread.id)
for message in messages.data:
if run.id != message.run_id:
continue
for content in message.content:
if content.type == "text":
answer = content.text.value
break
print(answer)
OS
Linux
Python version
3.11.1
Library version
openai v1.51.0
I came up with a solution, but it doesn’t work well for large JSON files (around 5MB), and sometimes for smaller simple file, it returns: {"n": "I couldn't find your given name in the provided documents."} :-D
import json
import os
import time
from openai import OpenAI
TEMPERATURE = 0.2
def ask(client: OpenAI, assistant_id: str, thread_id: str, file_id: str, instruction: str) -> str:
client.beta.threads.messages.create(
thread_id=thread_id,
content=instruction,
role="user",
attachments=[{"file_id": file_id, "tools": [{"type": "file_search"}]}],
)
run = client.beta.threads.runs.create(
thread_id=thread_id,
model="gpt-4o",
tools=[{"type": "file_search"}],
assistant_id=assistant_id,
temperature=TEMPERATURE
)
count = 0
while True:
run = client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run.id)
if run.status == "completed":
break
elif run.status == "failed":
raise Exception(f"{run.last_error.code}: {run.last_error.message}")
count += 1
if count > 30:
raise Exception("Too many requests")
else:
time.sleep(4)
answer = ""
messages = client.beta.threads.messages.list(thread_id=thread_id)
for message in messages.data:
if run.id != message.run_id:
continue
for content in message.content:
if content.type == "text":
answer = content.text.value
break
return answer
def main() -> None:
client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
# Create assistant
assistant = client.beta.assistants.create(
description="Test",
instructions="Return all answer in JSON format",
model="gpt-4o",
tools=[{"type": "file_search"}],
temperature=TEMPERATURE
)
# Upload file
data = ("data.json", json.dumps({"given_name": "John", "family_name": "Smit"}).encode())
file = client.files.create(file=data, purpose="assistants")
thread = client.beta.threads.create() # Create thread
instruction = "What is my given_name? Answer format {'n': '<name>'}"
answer = ask(client, assistant.id, thread.id, file.id, instruction)
print(answer) # Sometimes answer could be {"n": "I couldn't find your given name in the provided documents."}
instruction = "What is my last family_name? Answer format {'l': '<last name>'}"
answer = ask(client, assistant.id, thread.id, file.id, instruction)
print(answer)
if __name__ == '__main__':
main()
Really sorry for the delayed response.
This sounds like an issue with the underlying OpenAI API and not the SDK, so I'm going to go ahead and close this issue.
Would you mind reposting at community.openai.com if you're still running into this?