Exception when generating synthetic testset
[ X ] I have checked the documentation and related resources and couldn't resolve my bug.
Describe the bug I'm basically following the documentation of how to create a synthetic dataset from langchain documents. LLM is a gpt-3.5-turbo hosted on Azure OpenAI.
Multithreading makes it hard to debug into the library at this point with VSCode.
Ragas version: 0.1.3 Python version: 3.11
Code to Reproduce
from langchain_openai.chat_models import AzureChatOpenAI
from langchain_openai.embeddings import AzureOpenAIEmbeddings
from rag_system.config import AzureOpenAILLMConfig
from ragas.embeddings import LangchainEmbeddingsWrapper
from ragas.llms import LangchainLLMWrapper
from ragas.testset.generator import TestsetGenerator
from ragas.testset.docstore import InMemoryDocumentStore
from langchain.text_splitter import TokenTextSplitter
from ragas.testset.extractor import KeyphraseExtractor
generator_llm = LangchainLLMWrapper(AzureChatOpenAI(
openai_api_version=openai_config.api_version,
azure_deployment=openai_config.deployment_name,
azure_endpoint=openai_config.endpoint,
openai_api_key=os.getenv("AZURE_OPENAI_API_KEY"),
))
embeddings_model = LangchainEmbeddingsWrapper(AzureOpenAIEmbeddings(
openai_api_version=openai_config.api_version,
azure_deployment=os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME"),
azure_endpoint=openai_config.endpoint,
openai_api_key=os.getenv("AZURE_OPENAI_API_KEY"),
))
keyphrase_extractor = KeyphraseExtractor(llm=generator_llm)
splitter = TokenTextSplitter(chunk_size=2500, chunk_overlap=100)
doc_store = InMemoryDocumentStore(
splitter=splitter, extractor=keyphrase_extractor, embeddings=embeddings_model
)
generator = TestsetGenerator(
generator_llm=generator_llm,
critic_llm=generator_llm,
embeddings=embeddings_model,
docstore=doc_store,
)
from ragas.testset.evolutions import simple, reasoning, conditional
# adapt to language
language = "german"
cache_dir = '.ragas'
generator.adapt(language, evolutions=[simple, reasoning, conditional], cache_dir=cache_dir)
generator.save(evolutions=[simple, reasoning, conditional], cache_dir=cache_dir)
# docs is a list of langchain documents
testset = generator.generate_with_langchain_docs(
docs, test_size=25, distributions={
simple:0.5,
reasoning:0.25,
conditional:0.25
}
)
Error trace
Exception in thread Thread-18:
Traceback (most recent call last):
File "/home/vscode/.pyenv/versions/3.11.8/lib/python3.11/threading.py", line 1045, in _bootstrap_inner
self.run()
File "/workspaces/Shareflex%20Copilot/.venv/lib/python3.11/site-packages/ragas/executor.py", line 93, in run
results = self.loop.run_until_complete(self._aresults())
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/vscode/.pyenv/versions/3.11.8/lib/python3.11/asyncio/base_events.py", line 654, in run_until_complete
return future.result()
^^^^^^^^^^^^^^^
File "/workspaces/Shareflex%20Copilot/.venv/lib/python3.11/site-packages/ragas/executor.py", line 81, in _aresults
raise e
File "/workspaces/Shareflex%20Copilot/.venv/lib/python3.11/site-packages/ragas/executor.py", line 76, in _aresults
r = await future
^^^^^^^^^^^^
File "/home/vscode/.pyenv/versions/3.11.8/lib/python3.11/asyncio/tasks.py", line 615, in _wait_for_one
return f.result() # May raise f.exception().
^^^^^^^^^^
File "/workspaces/Shareflex%20Copilot/.venv/lib/python3.11/site-packages/ragas/executor.py", line 36, in sema_coro
return await coro
^^^^^^^^^^
File "/workspaces/Shareflex%20Copilot/.venv/lib/python3.11/site-packages/ragas/executor.py", line 109, in wrapped_callable_async
return counter, await callable(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/workspaces/Shareflex%20Copilot/.venv/lib/python3.11/site-packages/ragas/testset/evolutions.py", line 147, in evolve
return await self.generate_datarow(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/workspaces/Shareflex%20Copilot/.venv/lib/python3.11/site-packages/ragas/testset/evolutions.py", line 209, in generate_datarow
selected_nodes = [
^
File "/workspaces/Shareflex%20Copilot/.venv/lib/python3.11/site-packages/ragas/testset/evolutions.py", line 212, in <listcomp>
if i - 1 < len(current_nodes.nodes)
~~^~~
TypeError: unsupported operand type(s) for -: 'str' and 'int'
Additional context Add any other context about the problem here.
Hi @almajo, are you able to generate the testset using above method
Unfortunately not... However, I can skip over it with the raise_exception=False flag and can generate at least some data.