pydantic-xml
pydantic-xml copied to clipboard
allow computed_fields to be retrieved in model_validator's data arg d…
PR
This PR addresses the issue discussed in https://github.com/dapper91/pydantic-xml/issues/259.
Context
In Pydantic v2, @computed_field values are accessible during model validation phases such as @model_validator(mode="before") or "wrap". While these fields are not meant to be reassigned to the model (as they are derived), there are valid use cases for accessing their raw deserialized values—particularly during early validation.
Problem
Currently, pydantic-xml discards computed fields entirely during deserialization. This prevents developers from accessing raw data that may be essential for reconstructing the full model state, even if the field is not meant to be stored directly.
Proposal
This PR proposes to retain deserialized values for computed fields, making them available in the data dictionary passed to the @model_validator hooks. The computed fields are still not assigned to the model instance, in line with Pydantic's design—but their raw XML content is preserved for downstream inference logic.
MWE
class Contact(BaseXmlModel):
first_name: str | None = Field(exclude=True, default=None)
last_name: str | None = Field(exclude=True, default=None)
@computed_element(tag="full_name") # type: ignore
def full_name(self) -> str:
return f"{self.first_name}#{self.last_name}"
@model_validator(mode="wrap")
@classmethod
def validator(cls, data: Any, handler: ModelWrapValidatorHandler[Self]) -> Self:
pass
model = handler(data)
if full_name := data.get("full_name", None):
first_name, last_name = full_name.split("#")
model.first_name = first_name
model.last_name = last_name
return model
obj = Contact(first_name="Lionel", last_name="du Peloux")
xml_string = obj.to_xml(pretty_print=True)
print(xml_string)
print("--------------------------------")
obj = Contact.from_xml(xml_string)
print(obj.to_xml(pretty_print=True, encoding="unicode"))
@lionpeloux Hi,
Computed fields are ignored during deserialization for a reason. Your fix disabled this logic for primitive types but the same behavior is expected from other types (dict, collection, model) as well. For example this code should work too:
class Name(BaseXmlModel):
full_name: str
class Contact(BaseXmlModel):
first_name: str | None = Field(exclude=True, default=None)
last_name: str | None = Field(exclude=True, default=None)
@computed_element(tag="full_name")
def full_name(self) -> Name:
return Name(full_name=f"{self.first_name}#{self.last_name}")
But that could lead to some unexpected behavior.
If the Contact tries to deserialize an xml document with a full_name element that doesn't comply with Name format, deserialization will fail but it shouldn't since full_name is just a computed field.
For example:
>>> from pydantic import Field
>>> from pydantic_xml import BaseXmlModel, computed_element
>>>
>>>
>>> class Name(BaseXmlModel):
... full_name: str
...
>>> class Contact(BaseXmlModel):
... first_name: str | None = Field(exclude=True, default=None)
... last_name: str | None = Field(exclude=True, default=None)
...
... @computed_element(tag="full_name")
... def full_name(self) -> Name:
... return Name(full_name=f"{self.first_name}#{self.last_name}")
...
>>>
>>> xml_string = '''
... <Contact>
... <full_name/>
... </Contact>
... '''
>>>
>>> Contact.from_xml(xml_string)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/Users/dima/git/pydantic-xml/pydantic_xml/model.py", line 549, in from_xml
return cls.from_xml_tree(etree.fromstring(source, **kwargs), context=context)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/dima/git/pydantic-xml/pydantic_xml/model.py", line 523, in from_xml_tree
ModelT, cls.__xml_serializer__.deserialize(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/dima/git/pydantic-xml/pydantic_xml/serializers/factories/model.py", line 217, in deserialize
raise utils.into_validation_error(title=self._model.__name__, errors_map=field_errors)
pydantic_core._pydantic_core.ValidationError: 1 validation error for Contact
full_name.full_name
[line 3]: Field required [type=missing, input_value={}, input_type=dict]
>>>
Hi @dapper91, thank you for taking the time to respond.
I realize my initial suggestion may have been a bit naive with respect to other use cases. The way pydantic-xml maps primitive fields—by removing a level of nesting—might explain the behavior I observed :
{
"full_name": {
"full_name": ""
}
}
Maps to :
<Contact><full_name></full_name></Contact>
My main point was to highlight that, in pydantic, raw values are accessible via the data argument in a model_validator (see examples below). This is not the case in pydantic-xml, which I find surprising given its close integration with pydantic.
Is there a specific reason why this behavior cannot be replicated?
I believe this is expected behavior: while it’s clear that you can’t assign values back to computed fields, the raw input should still be passed to the user via data, allowing them to leverage that information if needed in the deserialization process.
from typing import Any, Self
from pydantic import BaseModel, Field, ModelWrapValidatorHandler, computed_field
from pydantic_xml.model import BaseXmlModel, computed_element, element
from pydantic import (
BaseModel,
ModelWrapValidatorHandler,
ValidationError,
model_validator,
)
def example_01():
class Name(BaseXmlModel):
full_name: str
@model_validator(mode="wrap")
@classmethod
def model_validator(
cls, data: Any, handler: ModelWrapValidatorHandler[Self]
) -> Self:
print(f"model_validator data : {data}")
return handler(data)
class Contact(BaseXmlModel):
first_name: str | None = Field(exclude=True, default=None)
last_name: str | None = Field(exclude=True, default=None)
@computed_element(tag="full_name")
@property
def full_name(self) -> Name:
if self.first_name is None and self.last_name is None:
return Name(full_name="")
return Name(full_name=f"{self.first_name}#{self.last_name}")
@model_validator(mode="wrap")
@classmethod
def model_validator(
cls, data: Any, handler: ModelWrapValidatorHandler[Self]
) -> Self:
print(f"model_validator data : {data}")
return handler(data)
xml_string = Contact().to_xml()
result = Contact.from_xml(xml_string)
print(result.model_dump_json(indent=2))
def example_01_bis():
class Name(BaseModel):
full_name: str
@model_validator(mode="wrap")
@classmethod
def model_validator(
cls, data: Any, handler: ModelWrapValidatorHandler[Self]
) -> Self:
print(f"model_validator data : {data}")
return handler(data)
class Contact(BaseModel):
first_name: str | None = Field(exclude=True, default=None)
last_name: str | None = Field(exclude=True, default=None)
@computed_field
@property
def full_name(self) -> Name:
if self.first_name is None and self.last_name is None:
return Name(full_name="")
return Name(full_name=f"{self.first_name}#{self.last_name}")
@model_validator(mode="wrap")
@classmethod
def model_validator(
cls, data: Any, handler: ModelWrapValidatorHandler[Self]
) -> Self:
print(f"model_validator data : {data}")
return handler(data)
contact = Contact()
json_str = contact.model_dump_json(indent=2)
model = Contact.model_validate_json(json_str)
print(model.model_dump_json(indent=2))
def example_02():
class Contact(BaseXmlModel):
first_name: str | None = Field(exclude=True, default=None)
last_name: str | None = Field(exclude=True, default=None)
@computed_element(tag="full_name")
@property
def full_name(self) -> str:
if self.first_name is None and self.last_name is None:
return ""
return f"{self.first_name}#{self.last_name}"
@model_validator(mode="wrap")
@classmethod
def model_validator(
cls, data: Any, handler: ModelWrapValidatorHandler[Self]
) -> Self:
print(f"model_validator data : {data}")
return handler(data)
xml_string = Contact().to_xml()
result = Contact.from_xml(xml_string)
print(result.model_dump_json(indent=2))
def example_02_bis():
class Contact(BaseModel):
first_name: str | None = Field(exclude=True, default=None)
last_name: str | None = Field(exclude=True, default=None)
@computed_field
@property
def full_name(self) -> str:
if self.first_name is None and self.last_name is None:
return ""
return f"{self.first_name}#{self.last_name}"
@model_validator(mode="wrap")
@classmethod
def model_validator(
cls, data: Any, handler: ModelWrapValidatorHandler[Self]
) -> Self:
print(f"model_validator data : {data}")
return handler(data)
# contact = Contact(first_name="John", last_name="Doe")
contact = Contact()
json_str = contact.model_dump_json(indent=2)
model = Contact.model_validate_json(json_str)
print(model.model_dump_json(indent=2))
if __name__ == "__main__":
print("")
print("-"*20)
print("EXAMPLE 01")
print("-"*20)
example_01()
print("")
print("-"*20)
print("EXAMPLE 01 BIS")
print("-"*20)
example_01_bis()
print("")
print("-"*20)
print("EXAMPLE 02")
print("-"*20)
example_02()
print("")
print("-"*20)
print("EXAMPLE 02 BIS")
print("-"*20)
example_02_bis()
print("")
--------------------
EXAMPLE 01
--------------------
model_validator data : {}
model_validator data : {'full_name': ''}
model_validator data : {'full_name': ''}
model_validator data : {}
model_validator data : {'full_name': ''}
model_validator data : {'full_name': ''}
{
"full_name": {
"full_name": ""
}
}
--------------------
EXAMPLE 01 BIS
--------------------
model_validator data : {}
model_validator data : {'full_name': ''}
model_validator data : {'full_name': ''}
model_validator data : {'full_name': {'full_name': ''}}
model_validator data : {'full_name': ''}
model_validator data : {'full_name': ''}
{
"full_name": {
"full_name": ""
}
}
--------------------
EXAMPLE 02
--------------------
model_validator data : {}
model_validator data : {}
{
"full_name": ""
}
--------------------
EXAMPLE 02 BIS
--------------------
model_validator data : {}
model_validator data : {'full_name': ''}
{
"full_name": ""
}