Pydantic doesn't play nicely with OAI multimodal content + tool calls because of`typing.Iterable` lazy validation
It seems like Pydantic has issue with validating and parsing OAI chat messages with iterable content fields, e.g.
from openai.types.chat.chat_completion_message_param import ChatCompletionMessageParam
from pydantic import BaseModel
class Sample(BaseModel):
messages: list[ChatCompletionMessageParam]
obj = Sample(
messages=[
{"role": "user", "content": [{"type": "text", "text": "Hello, World!"}]},
]
)
print(obj.model_dump())
outputs
{'messages': [{'content': SerializationIterator(index=0, iterator=ValidatorIterator(index=0, schema=Some(Union(UnionValidator { mode: Smart, choices: [(DefinitionRef(DefinitionRefValidator { definition: "ChatCompletionContentPartTextParam" }), None), (TypedDict(TypedDictValidator { fields: [TypedDictField { name: "image_url", lookup_key_collection: LookupKeyCollection { by_name: Simple(LookupPath { first_item: PathItemString { key: "image_url", py_key: Py(0x7ff76f43cdb0) }, rest: [] }), by_alias: None, by_alias_then_name: None }, name_py: Py(0x7ff76faa0eb0), required: true, validator: TypedDict(TypedDictValidator { fields: [TypedDictField { name: "url", lookup_key_collection: LookupKeyCollection { by_name: Simple(LookupPath { first_item: PathItemString { key: "url", py_key: Py(0x7ff76f41bbd0) }, rest: [] }), by_alias: None, by_alias_then_name: None }, name_py: Py(0x7ff771300570), required: true, validator: Str(StrValidator { strict: false, coerce_numbers_to_str: false }) }, TypedDictField { name: "detail", lookup_key_collection: LookupKeyCollection { by_name: Simple(LookupPath { first_item: PathItemString { key: "detail", py_key: Py(0x7ff76f41bc30) }, rest: [] }), by_alias: None, by_alias_then_name: None }, name_py: Py(0x7ff7706bb630), required: false, validator: Literal(LiteralValidator { lookup: LiteralLookup { expected_bool: None, expected_int: None, expected_str: Some({"low": 1, "high": 2, "auto": 0}), expected_py_dict: None, expected_py_values: None, expected_py_primitives: Some(Py(0x7ff76f452a00)), values: [Py(0x7ff771386910), Py(0x7ff7713a5380), Py(0x7ff7713a53b0)] }, expected_repr: "'auto', 'low' or 'high'", name: "literal['auto','low','high']" }) }], extra_behavior: Ignore, extras_validator: None, strict: false, loc_by_alias: true, validate_by_alias: None, validate_by_name: None, cls_name: Some("ImageURL") }) }, TypedDictField { name: "type", lookup_key_collection: LookupKeyCollection { by_name: Simple(LookupPath { first_item: PathItemString { key: "type", py_key: Py(0x7ff76f41bc60) }, rest: [] }), by_alias: None, by_alias_then_name: None }, name_py: Py(0x1726950), required: true, validator: Literal(LiteralValidator { lookup: LiteralLookup { expected_bool: None, expected_int: None, expected_str: Some({"image_url": 0}), expected_py_dict: None, expected_py_values: None, expected_py_primitives: Some(Py(0x7ff76f462880)), values: [Py(0x7ff76faa0eb0)] }, expected_repr: "'image_url'", name: "literal['image_url']" }) }], extra_behavior: Ignore, extras_validator: None, strict: false, loc_by_alias: true, validate_by_alias: None, validate_by_name: None, cls_name: Some("ChatCompletionContentPartImageParam") }), None), (TypedDict(TypedDictValidator { fields: [TypedDictField { name: "input_audio", lookup_key_collection: LookupKeyCollection { by_name: Simple(LookupPath { first_item: PathItemString { key: "input_audio", py_key: Py(0x7ff76f43ccf0) }, rest: [] }), by_alias: None, by_alias_then_name: None }, name_py: Py(0x7ff76f90b7b0), required: true, validator: TypedDict(TypedDictValidator { fields: [TypedDictField { name: "data", lookup_key_collection: LookupKeyCollection { by_name: Simple(LookupPath { first_item: PathItemString { key: "data", py_key: Py(0x7ff76f61ea00) }, rest: [] }), by_alias: None, by_alias_then_name: None }, name_py: Py(0x1722088), required: true, validator: Str(StrValidator { strict: false, coerce_numbers_to_str: false }) }, TypedDictField { name: "format", lookup_key_collection: LookupKeyCollection { by_name: Simple(LookupPath { first_item: PathItemString { key: "format", py_key: Py(0x7ff76f540660) }, rest: [] }), by_alias: None, by_alias_then_name: None }, name_py: Py(0x1722f00), required: true, validator: Literal(LiteralValidator { lookup: LiteralLookup { expected_bool: None, expected_int: None, expected_str: Some({"wav": 0, "mp3": 1}), expected_py_dict: None, expected_py_values: None, expected_py_primitives: Some(Py(0x7ff76f462840)), values: [Py(0x7ff76f8ee8b0), Py(0x7ff76f8eea00)] }, expected_repr: "'wav' or 'mp3'", name: "literal['wav','mp3']" }) }], extra_behavior: Ignore, extras_validator: None, strict: false, loc_by_alias: true, validate_by_alias: None, validate_by_name: None, cls_name: Some("InputAudio") }) }, TypedDictField { name: "type", lookup_key_collection: LookupKeyCollection { by_name: Simple(LookupPath { first_item: PathItemString { key: "type", py_key: Py(0x7ff76f50e430) }, rest: [] }), by_alias: None, by_alias_then_name: None }, name_py: Py(0x1726950), required: true, validator: Literal(LiteralValidator { lookup: LiteralLookup { expected_bool: None, expected_int: None, expected_str: Some({"input_audio": 0}), expected_py_dict: None, expected_py_values: None, expected_py_primitives: Some(Py(0x7ff76f462600)), values: [Py(0x7ff76f90b7b0)] }, expected_repr: "'input_audio'", name: "literal['input_audio']" }) }], extra_behavior: Ignore, extras_validator: None, strict: false, loc_by_alias: true, validate_by_alias: None, validate_by_name: None, cls_name: Some("ChatCompletionContentPartInputAudioParam") }), None), (TypedDict(TypedDictValidator { fields: [TypedDictField { name: "file", lookup_key_collection: LookupKeyCollection { by_name: Simple(LookupPath { first_item: PathItemString { key: "file", py_key: Py(0x7ff76f50f2d0) }, rest: [] }), by_alias: None, by_alias_then_name: None }, name_py: Py(0x1722c58), required: true, validator: TypedDict(TypedDictValidator { fields: [TypedDictField { name: "file_data", lookup_key_collection: LookupKeyCollection { by_name: Simple(LookupPath { first_item: PathItemString { key: "file_data", py_key: Py(0x7ff76f43cd30) }, rest: [] }), by_alias: None, by_alias_then_name: None }, name_py: Py(0x7ff76f902ff0), required: false, validator: Str(StrValidator { strict: false, coerce_numbers_to_str: false }) }, TypedDictField { name: "file_id", lookup_key_collection: LookupKeyCollection { by_name: Simple(LookupPath { first_item: PathItemString { key: "file_id", py_key: Py(0x7ff76f50c330) }, rest: [] }), by_alias: None, by_alias_then_name: None }, name_py: Py(0x7ff76fab6880), required: false, validator: Str(StrValidator { strict: false, coerce_numbers_to_str: false }) }, TypedDictField { name: "filename", lookup_key_collection: LookupKeyCollection { by_name: Simple(LookupPath { first_item: PathItemString { key: "filename", py_key: Py(0x7ff76f43cf70) }, rest: [] }), by_alias: None, by_alias_then_name: None }, name_py: Py(0x1722cc0), required: false, validator: Str(StrValidator { strict: false, coerce_numbers_to_str: false }) }], extra_behavior: Ignore, extras_validator: None, strict: false, loc_by_alias: true, validate_by_alias: None, validate_by_name: None, cls_name: Some("FileFile") }) }, TypedDictField { name: "type", lookup_key_collection: LookupKeyCollection { by_name: Simple(LookupPath { first_item: PathItemString { key: "type", py_key: Py(0x7ff76f4543c0) }, rest: [] }), by_alias: None, by_alias_then_name: None }, name_py: Py(0x1726950), required: true, validator: Literal(LiteralValidator { lookup: LiteralLookup { expected_bool: None, expected_int: None, expected_str: Some({"file": 0}), expected_py_dict: None, expected_py_values: None, expected_py_primitives: Some(Py(0x7ff76f4633c0)), values: [Py(0x1722c58)] }, expected_repr: "'file'", name: "literal['file']" }) }], extra_behavior: Ignore, extras_validator: None, strict: false, loc_by_alias: true, validate_by_alias: None, validate_by_name: None, cls_name: Some("File") }), None)], custom_error: None, name: "union[ChatCompletionContentPartTextParam,ChatCompletionContentPartImageParam,ChatCompletionContentPartInputAudioParam,File]" })))), 'role': 'user'}]}
The messages_to_printable function silently fails when trying to parse the generation outputs which leads to an empty prompt in vf-eval. One such example is primeintellect/hle which uses text/ image part prompts for multi-modal samples which break because of this.
okay, so looked a bit further and looks like this is how Pydantic handles fields that are typed as typing.Iterable. It seems to be used for infinite generators (and as such doesn't get consumed on model dump?)
from typing import Iterable
from pydantic import BaseModel
class Model(BaseModel):
message: Iterable[str]
print(Model(message=["a", "b", "c"]).model_dump())
# {'message': SerializationIterator(index=0, iterator=ValidatorIterator(index=0, schema=Some(Str(StrValidator { strict: false, coerce_numbers_to_str: false }))))}
The exact same code but using typing.Sequence or a simple list works
from typing import Sequence
from pydantic import BaseModel
class Model(BaseModel):
message: Sequence[str]
print(Model(message=["a", "b", "c"]).model_dump())
# {'message': ['a', 'b', 'c']}
Unfortunately, the raw OAI types use typing.Iterable
Update: This also causes issues with tool calls as they are also typed with typing.Iterable