ragas
ragas copied to clipboard
Testset Generation. RateLimitError
Describe the bug Rate limit reached for gpt-3.5-turbo-0125 on tokens per min (TPM): Limit 80000, Used 79767, Requested 736. Please try again in 377ms. Visit https://platform.openai.com/account/rate-limits to learn more.
Ragas version: 0.1.2.dev5+g2235ee9 Python version: 3.10.12
Code to Reproduce
from ragas.testset import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context
distributions = {
simple: 0.6,
multi_context: 0.2,
reasoning: 0.2
}
testsetgenerator = TestsetGenerator.with_openai(
generator_llm = 'gpt-3.5-turbo-0125',
critic_llm = 'gpt-3.5-turbo-0125',
embeddings = "text-embedding-3-small",
chunk_size = 512,
)
testsetgenerator.adapt(language='russian', evolutions=distributions)
testset = testsetgenerator.generate_with_langchain_docs(docs, 50, distributions, raise_exceptions=True)
testset_pd = testset.to_pandas()
testset_pd.sample(10)
Error trace
Exception in thread Thread-10:
Traceback (most recent call last):
File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
self.run()
File "/usr/local/lib/python3.10/dist-packages/ragas/executor.py", line 75, in run
results = self.loop.run_until_complete(self._aresults())
File "/usr/lib/python3.10/asyncio/base_events.py", line 649, in run_until_complete
return future.result()
File "/usr/local/lib/python3.10/dist-packages/ragas/executor.py", line 63, in _aresults
raise e
File "/usr/local/lib/python3.10/dist-packages/ragas/executor.py", line 58, in _aresults
r = await future
File "/usr/lib/python3.10/asyncio/tasks.py", line 571, in _wait_for_one
return f.result() # May raise f.exception().
File "/usr/local/lib/python3.10/dist-packages/ragas/executor.py", line 91, in wrapped_callable_async
return counter, await callable(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/ragas/testset/extractor.py", line 49, in extract
results = await self.llm.generate(prompt=prompt, is_async=is_async)
File "/usr/local/lib/python3.10/dist-packages/ragas/llms/base.py", line 92, in generate
return await agenerate_text_with_retry(
File "/usr/local/lib/python3.10/dist-packages/tenacity/_asyncio.py", line 88, in async_wrapped
return await fn(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/tenacity/_asyncio.py", line 47, in __call__
do = self.iter(retry_state=retry_state)
File "/usr/local/lib/python3.10/dist-packages/tenacity/__init__.py", line 325, in iter
raise retry_exc.reraise()
File "/usr/local/lib/python3.10/dist-packages/tenacity/__init__.py", line 158, in reraise
raise self.last_attempt.result()
File "/usr/lib/python3.10/concurrent/futures/_base.py", line 451, in result
return self.__get_result()
File "/usr/lib/python3.10/concurrent/futures/_base.py", line 403, in __get_result
raise self._exception
File "/usr/local/lib/python3.10/dist-packages/tenacity/_asyncio.py", line 50, in __call__
result = await fn(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/ragas/llms/base.py", line 169, in agenerate_text
return await self.langchain_llm.agenerate_prompt(
File "/usr/local/lib/python3.10/dist-packages/langchain_core/language_models/chat_models.py", line 554, in agenerate_prompt
return await self.agenerate(
File "/usr/local/lib/python3.10/dist-packages/langchain_core/language_models/chat_models.py", line 514, in agenerate
raise exceptions[0]
File "/usr/local/lib/python3.10/dist-packages/langchain_core/language_models/chat_models.py", line 617, in _agenerate_with_cache
return await self._agenerate(
File "/usr/local/lib/python3.10/dist-packages/langchain_openai/chat_models/base.py", line 533, in _agenerate
response = await self.async_client.create(messages=message_dicts, **params)
File "/usr/local/lib/python3.10/dist-packages/openai/resources/chat/completions.py", line 1330, in create
return await self._post(
File "/usr/local/lib/python3.10/dist-packages/openai/_base_client.py", line 1725, in post
return await self.request(cast_to, opts, stream=stream, stream_cls=stream_cls)
File "/usr/local/lib/python3.10/dist-packages/openai/_base_client.py", line 1428, in request
return await self._request(
File "/usr/local/lib/python3.10/dist-packages/openai/_base_client.py", line 1504, in _request
return await self._retry_request(
File "/usr/local/lib/python3.10/dist-packages/openai/_base_client.py", line 1550, in _retry_request
return await self._request(
File "/usr/local/lib/python3.10/dist-packages/openai/_base_client.py", line 1504, in _request
return await self._retry_request(
File "/usr/local/lib/python3.10/dist-packages/openai/_base_client.py", line 1550, in _retry_request
return await self._request(
File "/usr/local/lib/python3.10/dist-packages/openai/_base_client.py", line 1519, in _request
raise self._make_status_error_from_response(err.response) from None
openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo-0125 in organization org-4FhBaCDesRNA1pK7Qr04gXs2 on tokens per min (TPM): Limit 80000, Used 79767, Requested 736. Please try again in 377ms. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}
---------------------------------------------------------------------------
ExceptionInRunner Traceback (most recent call last)
<ipython-input-21-f9a1848dc49d> in <cell line: 25>()
23
24 # в обновленной версии testset сразу включает ground_truth
---> 25 testset = testsetgenerator.generate_with_langchain_docs(docs, 50, distributions, raise_exceptions=True)
26
27 testset_pd = testset.to_pandas()
2 frames
/usr/local/lib/python3.10/dist-packages/ragas/testset/generator.py in generate_with_langchain_docs(self, documents, test_size, distributions, with_debugging_logs, is_async, raise_exceptions, run_config)
148 ):
149 # chunk documents and add to docstore
--> 150 self.docstore.add_documents(
151 [Document.from_langchain_document(doc) for doc in documents]
152 )
/usr/local/lib/python3.10/dist-packages/ragas/testset/docstore.py in add_documents(self, docs, show_progress)
208 ]
209
--> 210 self.add_nodes(nodes, show_progress=show_progress)
211
212 def add_nodes(
/usr/local/lib/python3.10/dist-packages/ragas/testset/docstore.py in add_nodes(self, nodes, show_progress, desc)
248 results = executor.results()
249 if results == []:
--> 250 raise ExceptionInRunner()
251
252 for i, n in enumerate(nodes):
ExceptionInRunner: The runner thread which was running the jobs raised an exeception. Read the traceback above to debug it. You can also pass `raise_exception=False` incase you want to show only a warning message instead.
Expected behavior Testset
Additional context
Hey @Sansan4ez I think you should just pass raise_exceptions=False to make it retry.
@shahules786 it doesn't work even with your suggestion: openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo-0125 in organization org-xxxx on tokens per min (TPM): Limit 60000, Used 59857, Requested 770. Please try again in 627ms. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}
File "/home/dgxadmin/rag_evaluation/create_test_data.py", line 70, in raise_exception=False incase you want to show only a warning message instead.
@srikanthmalla Hello, I also occasionally faced this bug, are you able to reproduce it consistently? Because I couldn't.
Also running into similar errors— tried using raise_exception=False and adding a runconfig with very high limits, as well as making the whole thing run with is_async=False
commenting just to bump this issue~
I have the same issue with ragas version 0.1.3 (Python version 3.10.13).
Using metrics "faithfulness", "answer_relevancy", "context_relevancy", "context_recall", "context_precision", and "answer_correctness" and OpenAI with the default model. I have 69 questions, which results in 414 requests for this combination with metrics.
I was in tier 1 on OpenAI's API, but then upgraded to tier 2. Despite increased request limits, I run into the openai.RateLimitError error as described in this issue. The error is pretty clear:
raise self._make_status_error_from_response(err.response) from None
openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo-16k in organization org-xyz on tokens per min (TPM): Limit 80000, Used 79781, Requested 4629. Please try again in 3.307s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}
It seems to me that the library initially fetches the request limit (80k in my case now on the new tier), and then tries to use as many requests as possible, but there is a bug which leads to maxing out this limit.
I tried the same as @fongelias and finally found a configuration of parameters that does not lead to exceeding the rate limit. So this workaround works for me, but it results in very slow execution due to the loss of parallelism:
evaluate(
dataset,
raise_exceptions=True,
run_config=RunConfig(
timeout=60,
max_retries=10,
max_wait = 180, # default: 60
max_workers= 1, # default: 16 <--- I think this is the setting that ensures that there are no rate limit exceptions!
),
is_async=False,
metrics=[
# RAGAs score (https://arxiv.org/abs/2309.15217):
# Generation
faithfulness, # factual consistence measure, requires: question, contexts, answer
answer_relevancy, # how complete and concise the answer is, requires: question, answer
# Retrieval
context_relevancy, # how well the contexts fit the question, requires: question, contexts
# Additional evaluation, Ragas library (https://docs.ragas.io/en/stable/concepts/metrics/index.html)
# Retrieval
context_recall, # how aligned is context with the ground truth, requires: contexts, ground truth
context_precision, # how well-ordered relevant context is (most relevant on top?), requires: question, contexts
# End-to-End
answer_correctness, # how correct an answer is, requires: answer, ground truth
],
)