verifiers icon indicating copy to clipboard operation
verifiers copied to clipboard

Pydantic doesn't play nicely with OAI multimodal content + tool calls because of`typing.Iterable` lazy validation

Open mikasenghaas opened this issue 3 months ago • 2 comments

It seems like Pydantic has issue with validating and parsing OAI chat messages with iterable content fields, e.g.

from openai.types.chat.chat_completion_message_param import ChatCompletionMessageParam
from pydantic import BaseModel


class Sample(BaseModel):
    messages: list[ChatCompletionMessageParam]


obj = Sample(
    messages=[
        {"role": "user", "content": [{"type": "text", "text": "Hello, World!"}]},
    ]
)

print(obj.model_dump())

outputs

{'messages': [{'content': SerializationIterator(index=0, iterator=ValidatorIterator(index=0, schema=Some(Union(UnionValidator { mode: Smart, choices: [(DefinitionRef(DefinitionRefValidator { definition: "ChatCompletionContentPartTextParam" }), None), (TypedDict(TypedDictValidator { fields: [TypedDictField { name: "image_url", lookup_key_collection: LookupKeyCollection { by_name: Simple(LookupPath { first_item: PathItemString { key: "image_url", py_key: Py(0x7ff76f43cdb0) }, rest: [] }), by_alias: None, by_alias_then_name: None }, name_py: Py(0x7ff76faa0eb0), required: true, validator: TypedDict(TypedDictValidator { fields: [TypedDictField { name: "url", lookup_key_collection: LookupKeyCollection { by_name: Simple(LookupPath { first_item: PathItemString { key: "url", py_key: Py(0x7ff76f41bbd0) }, rest: [] }), by_alias: None, by_alias_then_name: None }, name_py: Py(0x7ff771300570), required: true, validator: Str(StrValidator { strict: false, coerce_numbers_to_str: false }) }, TypedDictField { name: "detail", lookup_key_collection: LookupKeyCollection { by_name: Simple(LookupPath { first_item: PathItemString { key: "detail", py_key: Py(0x7ff76f41bc30) }, rest: [] }), by_alias: None, by_alias_then_name: None }, name_py: Py(0x7ff7706bb630), required: false, validator: Literal(LiteralValidator { lookup: LiteralLookup { expected_bool: None, expected_int: None, expected_str: Some({"low": 1, "high": 2, "auto": 0}), expected_py_dict: None, expected_py_values: None, expected_py_primitives: Some(Py(0x7ff76f452a00)), values: [Py(0x7ff771386910), Py(0x7ff7713a5380), Py(0x7ff7713a53b0)] }, expected_repr: "'auto', 'low' or 'high'", name: "literal['auto','low','high']" }) }], extra_behavior: Ignore, extras_validator: None, strict: false, loc_by_alias: true, validate_by_alias: None, validate_by_name: None, cls_name: Some("ImageURL") }) }, TypedDictField { name: "type", lookup_key_collection: LookupKeyCollection { by_name: Simple(LookupPath { first_item: PathItemString { key: "type", py_key: Py(0x7ff76f41bc60) }, rest: [] }), by_alias: None, by_alias_then_name: None }, name_py: Py(0x1726950), required: true, validator: Literal(LiteralValidator { lookup: LiteralLookup { expected_bool: None, expected_int: None, expected_str: Some({"image_url": 0}), expected_py_dict: None, expected_py_values: None, expected_py_primitives: Some(Py(0x7ff76f462880)), values: [Py(0x7ff76faa0eb0)] }, expected_repr: "'image_url'", name: "literal['image_url']" }) }], extra_behavior: Ignore, extras_validator: None, strict: false, loc_by_alias: true, validate_by_alias: None, validate_by_name: None, cls_name: Some("ChatCompletionContentPartImageParam") }), None), (TypedDict(TypedDictValidator { fields: [TypedDictField { name: "input_audio", lookup_key_collection: LookupKeyCollection { by_name: Simple(LookupPath { first_item: PathItemString { key: "input_audio", py_key: Py(0x7ff76f43ccf0) }, rest: [] }), by_alias: None, by_alias_then_name: None }, name_py: Py(0x7ff76f90b7b0), required: true, validator: TypedDict(TypedDictValidator { fields: [TypedDictField { name: "data", lookup_key_collection: LookupKeyCollection { by_name: Simple(LookupPath { first_item: PathItemString { key: "data", py_key: Py(0x7ff76f61ea00) }, rest: [] }), by_alias: None, by_alias_then_name: None }, name_py: Py(0x1722088), required: true, validator: Str(StrValidator { strict: false, coerce_numbers_to_str: false }) }, TypedDictField { name: "format", lookup_key_collection: LookupKeyCollection { by_name: Simple(LookupPath { first_item: PathItemString { key: "format", py_key: Py(0x7ff76f540660) }, rest: [] }), by_alias: None, by_alias_then_name: None }, name_py: Py(0x1722f00), required: true, validator: Literal(LiteralValidator { lookup: LiteralLookup { expected_bool: None, expected_int: None, expected_str: Some({"wav": 0, "mp3": 1}), expected_py_dict: None, expected_py_values: None, expected_py_primitives: Some(Py(0x7ff76f462840)), values: [Py(0x7ff76f8ee8b0), Py(0x7ff76f8eea00)] }, expected_repr: "'wav' or 'mp3'", name: "literal['wav','mp3']" }) }], extra_behavior: Ignore, extras_validator: None, strict: false, loc_by_alias: true, validate_by_alias: None, validate_by_name: None, cls_name: Some("InputAudio") }) }, TypedDictField { name: "type", lookup_key_collection: LookupKeyCollection { by_name: Simple(LookupPath { first_item: PathItemString { key: "type", py_key: Py(0x7ff76f50e430) }, rest: [] }), by_alias: None, by_alias_then_name: None }, name_py: Py(0x1726950), required: true, validator: Literal(LiteralValidator { lookup: LiteralLookup { expected_bool: None, expected_int: None, expected_str: Some({"input_audio": 0}), expected_py_dict: None, expected_py_values: None, expected_py_primitives: Some(Py(0x7ff76f462600)), values: [Py(0x7ff76f90b7b0)] }, expected_repr: "'input_audio'", name: "literal['input_audio']" }) }], extra_behavior: Ignore, extras_validator: None, strict: false, loc_by_alias: true, validate_by_alias: None, validate_by_name: None, cls_name: Some("ChatCompletionContentPartInputAudioParam") }), None), (TypedDict(TypedDictValidator { fields: [TypedDictField { name: "file", lookup_key_collection: LookupKeyCollection { by_name: Simple(LookupPath { first_item: PathItemString { key: "file", py_key: Py(0x7ff76f50f2d0) }, rest: [] }), by_alias: None, by_alias_then_name: None }, name_py: Py(0x1722c58), required: true, validator: TypedDict(TypedDictValidator { fields: [TypedDictField { name: "file_data", lookup_key_collection: LookupKeyCollection { by_name: Simple(LookupPath { first_item: PathItemString { key: "file_data", py_key: Py(0x7ff76f43cd30) }, rest: [] }), by_alias: None, by_alias_then_name: None }, name_py: Py(0x7ff76f902ff0), required: false, validator: Str(StrValidator { strict: false, coerce_numbers_to_str: false }) }, TypedDictField { name: "file_id", lookup_key_collection: LookupKeyCollection { by_name: Simple(LookupPath { first_item: PathItemString { key: "file_id", py_key: Py(0x7ff76f50c330) }, rest: [] }), by_alias: None, by_alias_then_name: None }, name_py: Py(0x7ff76fab6880), required: false, validator: Str(StrValidator { strict: false, coerce_numbers_to_str: false }) }, TypedDictField { name: "filename", lookup_key_collection: LookupKeyCollection { by_name: Simple(LookupPath { first_item: PathItemString { key: "filename", py_key: Py(0x7ff76f43cf70) }, rest: [] }), by_alias: None, by_alias_then_name: None }, name_py: Py(0x1722cc0), required: false, validator: Str(StrValidator { strict: false, coerce_numbers_to_str: false }) }], extra_behavior: Ignore, extras_validator: None, strict: false, loc_by_alias: true, validate_by_alias: None, validate_by_name: None, cls_name: Some("FileFile") }) }, TypedDictField { name: "type", lookup_key_collection: LookupKeyCollection { by_name: Simple(LookupPath { first_item: PathItemString { key: "type", py_key: Py(0x7ff76f4543c0) }, rest: [] }), by_alias: None, by_alias_then_name: None }, name_py: Py(0x1726950), required: true, validator: Literal(LiteralValidator { lookup: LiteralLookup { expected_bool: None, expected_int: None, expected_str: Some({"file": 0}), expected_py_dict: None, expected_py_values: None, expected_py_primitives: Some(Py(0x7ff76f4633c0)), values: [Py(0x1722c58)] }, expected_repr: "'file'", name: "literal['file']" }) }], extra_behavior: Ignore, extras_validator: None, strict: false, loc_by_alias: true, validate_by_alias: None, validate_by_name: None, cls_name: Some("File") }), None)], custom_error: None, name: "union[ChatCompletionContentPartTextParam,ChatCompletionContentPartImageParam,ChatCompletionContentPartInputAudioParam,File]" })))), 'role': 'user'}]}

The messages_to_printable function silently fails when trying to parse the generation outputs which leads to an empty prompt in vf-eval. One such example is primeintellect/hle which uses text/ image part prompts for multi-modal samples which break because of this.

mikasenghaas avatar Oct 05 '25 18:10 mikasenghaas

okay, so looked a bit further and looks like this is how Pydantic handles fields that are typed as typing.Iterable. It seems to be used for infinite generators (and as such doesn't get consumed on model dump?)

from typing import Iterable

from pydantic import BaseModel


class Model(BaseModel):
    message: Iterable[str]


print(Model(message=["a", "b", "c"]).model_dump())
# {'message': SerializationIterator(index=0, iterator=ValidatorIterator(index=0, schema=Some(Str(StrValidator { strict: false, coerce_numbers_to_str: false }))))}

The exact same code but using typing.Sequence or a simple list works

from typing import Sequence

from pydantic import BaseModel


class Model(BaseModel):
    message: Sequence[str]


print(Model(message=["a", "b", "c"]).model_dump())
# {'message': ['a', 'b', 'c']}

Unfortunately, the raw OAI types use typing.Iterable

mikasenghaas avatar Oct 05 '25 19:10 mikasenghaas

Update: This also causes issues with tool calls as they are also typed with typing.Iterable

mikasenghaas avatar Nov 06 '25 14:11 mikasenghaas