Error when running the documentation exemple for DAG eval
Describe the bug It seems the task node is not outputing a string as expected
ValidationError: 1 validation error for TaskNodeOutput output Input should be a valid string [type=string_type, input_value=['Intro', 'Body', 'Conclusion'], input_type=list] For further information visit https://errors.pydantic.dev/2.10/v/string_type
To Reproduce
On the doc https://docs.confident-ai.com/docs/metrics-dag
I reproduced the exemple with the only difference that I use an Azure hosted openAI model (gpt4o)
Screenshots
Is it due to JSON confinement because of my azure openAI use? https://docs.confident-ai.com/guides/guides-using-custom-llms
hey @Bennoo is there more above what you showed in the trace error message that you can show? It is probably json confinement because i jsut ran everything myself and it worked, but i want to see how can i improve the error message for cases like this
Hey @Bennoo since you are using AzureOpenAI's GPT-4o and the error itself looks like JSON confinement. It is recommended to add an additional argument of response_format = {"type": "json_object"}.
from langchain_openai import AzureChatOpenAI
from deepeval.models.base_model import DeepEvalBaseLLM
class AzureOpenAI(DeepEvalBaseLLM):
def __init__(
self,
model
):
self.model = model
def load_model(self):
return self.model
def generate(self, prompt: str) -> str:
chat_model = self.load_model()
return chat_model.invoke(prompt, response_format = {"type": "json_object"}).content
async def a_generate(self, prompt: str) -> str:
chat_model = self.load_model()
res = await chat_model.ainvoke(prompt, response_format = {"type": "json_object"})
return res.content
def get_model_name(self):
return "Custom Azure OpenAI Model"
# Replace these with real values
custom_model = AzureChatOpenAI(
openai_api_version=openai_api_version,
azure_deployment=azure_deployment,
azure_endpoint=azure_endpoint,
openai_api_key=openai_api_key,
)
azure_openai = AzureOpenAI(model=custom_model)
print(azure_openai.generate("Write me a joke"))
Hello @penguine-ip , from latest update (2.6.4), error is only popping when I use gpt4o-mini and no more gpt4o (error was happening with the two with previous version) So when I am using gpt4o-mini here is the full error stack trace:
---------------------------------------------------------------------------
ValidationError Traceback (most recent call last)
Cell In[35], line 4
1 from deepeval.metrics import DAGMetric
3 format_correctness = DAGMetric(name="Format Correctness", dag=dag, model=azure_openai)
----> 4 format_correctness.measure(test_case)
5 print(format_correctness.score)
File /usr/local/lib/python3.11/site-packages/deepeval/metrics/dag/dag.py:65, in DAGMetric.measure(self, test_case, _show_indicator)
63 if self.async_mode:
64 loop = get_or_create_event_loop()
---> 65 loop.run_until_complete(
66 self.a_measure(test_case, _show_indicator=False)
67 )
68 else:
69 self.dag._execute(metric=self, test_case=test_case)
File /usr/local/lib/python3.11/site-packages/nest_asyncio.py:98, in _patch_loop.<locals>.run_until_complete(self, future)
95 if not f.done():
96 raise RuntimeError(
97 'Event loop stopped before Future completed.')
---> 98 return f.result()
File /usr/local/lib/python3.11/asyncio/futures.py:203, in Future.result(self)
201 self.__log_traceback = False
202 if self._exception is not None:
--> 203 raise self._exception.with_traceback(self._exception_tb)
204 return self._result
File /usr/local/lib/python3.11/asyncio/tasks.py:279, in Task.__step(***failed resolving arguments***)
277 result = coro.send(None)
278 else:
--> 279 result = coro.throw(exc)
280 except StopIteration as exc:
281 if self._must_cancel:
282 # Task is cancelled right before coro stops.
File /usr/local/lib/python3.11/site-packages/deepeval/metrics/dag/dag.py:93, in DAGMetric.a_measure(self, test_case, _show_indicator)
89 self.evaluation_cost = 0 if self.using_native_model else None
90 with metric_progress_indicator(
91 self, async_mode=True, _show_indicator=_show_indicator
92 ):
---> 93 await self.dag._a_execute(metric=self, test_case=test_case)
94 self.success = self.is_successful()
95 self.verbose_logs = construct_verbose_logs(
96 self,
97 steps=[
(...)
100 ],
101 )
File /usr/local/lib/python3.11/site-packages/deepeval/metrics/dag/graph.py:38, in DeepAcyclicGraph._a_execute(self, metric, test_case)
33 async def _a_execute(
34 self,
35 metric: BaseMetric,
36 test_case: LLMTestCase,
37 ) -> None:
---> 38 await asyncio.gather(
39 *(
40 root_node._a_execute(
41 metric=metric, test_case=test_case, depth=0
42 )
43 for root_node in self.root_nodes
44 )
45 )
File /usr/local/lib/python3.11/asyncio/tasks.py:349, in Task.__wakeup(self, future)
347 def __wakeup(self, future):
348 try:
--> 349 future.result()
350 except BaseException as exc:
351 # This may also be a cancellation.
352 self.__step(exc)
File /usr/local/lib/python3.11/asyncio/tasks.py:277, in Task.__step(***failed resolving arguments***)
273 try:
274 if exc is None:
275 # We use the `send` method directly, because coroutines
276 # don't have `__iter__` and `__next__` methods.
--> 277 result = coro.send(None)
278 else:
279 result = coro.throw(exc)
File /usr/local/lib/python3.11/site-packages/deepeval/metrics/dag/nodes.py:337, in TaskNode._a_execute(self, metric, test_case, depth)
335 res = await metric.model.a_generate(prompt)
336 data = trimAndLoadJson(res, self)
--> 337 self._output = TaskNodeOutput(**data).output
339 metric._verbose_steps.append(
340 construct_node_verbose_log(self, self._depth)
341 )
342 await asyncio.gather(
343 *(
344 child._a_execute(
(...)
348 )
349 )
File /usr/local/lib/python3.11/site-packages/pydantic/main.py:214, in BaseModel.__init__(self, **data)
212 # `__tracebackhide__` tells pytest and some other tools to omit this function from tracebacks
213 __tracebackhide__ = True
--> 214 validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self)
215 if self is not validated_self:
216 warnings.warn(
217 'A custom validator is returning a value other than `self`.\n'
218 "Returning anything other than `self` from a top level model validator isn't supported when validating via `__init__`.\n"
219 'See the `model_validator` docs (https://docs.pydantic.dev/latest/concepts/validators/#model-validators) for more details.',
220 stacklevel=2,
221 )
ValidationError: 1 validation error for TaskNodeOutput
output
Input should be a valid string [type=string_type, input_value=['Intro', 'Body', 'Conclusion'], input_type=list]
For further information visit https://errors.pydantic.dev/2.10/v/string_type
Hello @spike-spiegel-21 !
I tried to fix the JSON containment by using the langchain with_structured_output() function.
Here is my customLLM implementation:
from langchain_openai import AzureChatOpenAI
from deepeval.models.base_model import DeepEvalBaseLLM
from pydantic import BaseModel
import os
class AzureOpenAI(DeepEvalBaseLLM):
def __init__(
self,
model
):
self.model = model
def load_model(self):
return self.model
def generate(self, prompt: str, schema: BaseModel) -> BaseModel:
chat_model = self.load_model().with_structured_output(schema, method="function_calling")
return chat_model.invoke(prompt)
async def a_generate(self, prompt: str, schema: BaseModel) -> BaseModel:
chat_model = self.load_model().with_structured_output(schema, method="function_calling")
res = await chat_model.ainvoke(prompt)
return res
def get_model_name(self):
return "Custom Azure OpenAI Model"
# Replace these with real values
custom_model = AzureChatOpenAI(
openai_api_version=os.getenv("OPENAI_API_VERSION"),
azure_deployment="gpt4o",
azure_endpoint=os.getenv("AZURE_ENDPOINT"),
openai_api_key=os.getenv("OPENAI_API_KEY"),
)
azure_openai = AzureOpenAI(model=custom_model)
What do you think ? Do you think it is better to use response_format parameter?
My version seems to work with GPT4o AND GPT4o-mini
Thanks,