pydantic.v1.error_wrappers.ValidationError: 2 validation errors for DataRow
code:
from langchain_community.document_loaders import DirectoryLoader
from ragas import run_config
from ragas.executor import Executor
from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.embeddings import HuggingFaceEmbeddings
import nest_asyncio
nest_asyncio.apply()
loader = DirectoryLoader("./data", glob="**/*.md")
documents = loader.load()
for document in documents:
document.metadata['filename'] = document.metadata['source']
# generator with openai models
generator_llm = ChatOpenAI(
model="Qwen2",
temperature=0.3,
openai_api_key="xxxx",
openai_api_base='http://xxxx.xxxx.xxxx.xxxx:xxxx',
stop=['<|im_end|>']
)
critic_llm = ChatOpenAI(
model="Qwen2",
temperature=0.3,
openai_api_key="xxxx",
openai_api_base='http://xxxx.xxxx.xxxx.xxxx:xxxx',
stop=['<|im_end|>']
)
embedding_model_name = "embedding\\bge-large-zh-v1.5"
embedding_model_kwargs = {'device': 'cpu'}
embedding_encode_kwargs = {'batch_size': 32, 'normalize_embeddings': True}
embed_model = HuggingFaceEmbeddings(
model_name=embedding_model_name,
model_kwargs=embedding_model_kwargs,
encode_kwargs=embedding_encode_kwargs
)
generator = TestsetGenerator.from_langchain(
generator_llm,
critic_llm,
embed_model
)
# generate testset
testset = generator.generate_with_langchain_docs(documents, test_size=10, distributions={simple: 0.5, reasoning: 0.25, multi_context: 0.25})
testset.to_pandas()
error:
Generating: 70%|███████ | 7/10 [09:45<04:10, 83.62s/it]
Traceback (most recent call last):
File "D:\RA_LLM\pythonProject\generate_test_data.py", line 50, in <module>
testset = generator.generate_with_langchain_docs(documents, test_size=10, distributions={simple: 0.5, reasoning: 0.25, multi_context: 0.25})
File "D:\anaconda\envs\bge\lib\site-packages\ragas\testset\generator.py", line 210, in generate_with_langchain_docs
return self.generate(
File "D:\anaconda\envs\bge\lib\site-packages\ragas\_analytics.py", line 129, in wrapper
result = func(*args, **kwargs)
File "D:\anaconda\envs\bge\lib\site-packages\ragas\testset\generator.py", line 309, in generate
raise e
File "D:\anaconda\envs\bge\lib\site-packages\ragas\testset\generator.py", line 304, in generate
test_data_rows = exec.results()
File "D:\anaconda\envs\bge\lib\site-packages\ragas\executor.py", line 116, in results
results = asyncio.run(_aresults())
File "D:\anaconda\envs\bge\lib\site-packages\nest_asyncio.py", line 30, in run
return loop.run_until_complete(task)
File "D:\anaconda\envs\bge\lib\site-packages\nest_asyncio.py", line 98, in run_until_complete
return f.result()
File "D:\anaconda\envs\bge\lib\asyncio\futures.py", line 201, in result
raise self._exception
File "D:\anaconda\envs\bge\lib\asyncio\tasks.py", line 256, in __step
result = coro.send(None)
File "D:\anaconda\envs\bge\lib\site-packages\ragas\executor.py", line 111, in _aresults
r = await future
File "D:\anaconda\envs\bge\lib\asyncio\tasks.py", line 611, in _wait_for_one
return f.result() # May raise f.exception().
File "D:\anaconda\envs\bge\lib\asyncio\futures.py", line 201, in result
raise self._exception
File "D:\anaconda\envs\bge\lib\asyncio\tasks.py", line 256, in __step
result = coro.send(None)
File "D:\anaconda\envs\bge\lib\site-packages\ragas\executor.py", line 34, in sema_coro
return await coro
File "D:\anaconda\envs\bge\lib\site-packages\ragas\executor.py", line 60, in wrapped_callable_async
raise e
File "D:\anaconda\envs\bge\lib\site-packages\ragas\executor.py", line 54, in wrapped_callable_async
result = await callable(*args, **kwargs)
File "D:\anaconda\envs\bge\lib\site-packages\ragas\testset\evolutions.py", line 145, in evolve
return await self.generate_datarow(
File "D:\anaconda\envs\bge\lib\site-packages\ragas\testset\evolutions.py", line 243, in generate_datarow
return DataRow(
File "D:\anaconda\envs\bge\lib\site-packages\pydantic\v1\main.py", line 341, in __init__
raise validation_error
pydantic.v1.error_wrappers.ValidationError: 2 validation errors for DataRow
ground_truth
str type expected (type=type_error.str)
ground_truth
value is not a valid float (type=type_error.float)
please help me !
The error you're encountering indicates that the ground_truth field in the DataRow model is not receiving the expected data type. The ground_truth field should be a string [1].
Here are some steps to help resolve the issue:
-
Check Data Type: Ensure that the
ground_truthvalue you are passing to theDataRowis a string. If it's currently a list or another type, convert it to a string. -
Debugging: Add print statements or use a debugger to inspect the data being passed to the
DataRowto ensure it matches the expected format. -
Review Code: Look at the part of your code where
DataRowinstances are created, especially in thegenerate_datarowfunction, and ensure theground_truthis correctly formatted as a string.
If you follow these steps and ensure the data types are correct, the validation error should be resolved.
To continue talking to Dosu, mention @dosu.
which version are you using?
ragas: 0.1.21 python: 3.9.19 pydantic : 2.9.2 pydantic_core: 2.23.4 pydantic-settings: 2.5.2
which version are you using?