datamodel-code-generator
datamodel-code-generator copied to clipboard
Extra keys in fields are missing within oneOf / anyOf structures
Describe the bug
field_include_all_keys=True should store all extra schema keys in Field attributes. However, this does not work for schemas that contain oneOf or anyOf
To Reproduce
Example schema:
{
"id": "example",
"title": "Example",
"type": "object",
"properties": {
"type": {"type": "string", "default": ["example"]},
"prop1": {"type": "string", "custom_key": "custom_value"},
"prop2": {
"custom_key": "custom_value",
"properties": {
"subprop0": {"type": "string", "custom_key": "custom_value_0"}
},
"oneOf": [
{
"title": "Subschema1",
"type": "object",
"properties": {
"subprop1": {"type": "string", "custom_key": "custom_value_1"}
}
},
{
"title": "Subschema2",
"type": "object",
"properties": {
"subprop2": {"type": "string", "custom_key": "custom_value_2"}
}
}
]
}
}
}
Used commandline:
$ datamodel-codegen --input-file-type jsonschema --input example.json --field-include-all-keys
Actual Result:
# generated by datamodel-codegen:
# filename: example.json
# timestamp: 2025-05-18T05:34:14+00:00
from __future__ import annotations
from typing import Optional, Union
from pydantic import Field
class Subschema1(BaseModel):
subprop0: Optional[str] = None
subprop1: Optional[str] = None
class Subschema2(BaseModel):
subprop0: Optional[str] = None
subprop2: Optional[str] = None
class Example(BaseModel):
type: Optional[str] = ["example"]
prop1: Optional[str] = Field(None, json_schema_extra={"custom_key": "custom_value"})
prop2: Optional[Union[Subschema1, Subschema2]] = Field(
None, json_schema_extra={"custom_key": "custom_value"}
)
Expected behavior
Custom keys contained in Fields:
# generated by datamodel-codegen:
# filename: example.json
# timestamp: 2025-05-18T05:26:34+00:00
from __future__ import annotations
from typing import Optional, Union
from pydantic import Field
class Subschema1(BaseModel):
subprop0: Optional[str] = Field(
None, json_schema_extra={"custom_key": "custom_value_0"}
)
subprop1: Optional[str] = Field(
None, json_schema_extra={"custom_key": "custom_value_1"}
)
class Subschema2(BaseModel):
subprop0: Optional[str] = Field(
None, json_schema_extra={"custom_key": "custom_value_0"}
)
subprop2: Optional[str] = Field(
None, json_schema_extra={"custom_key": "custom_value_2"}
)
class Example(BaseModel):
type: Optional[str] = ["example"]
prop1: Optional[str] = Field(None, json_schema_extra={"custom_key": "custom_value"})
prop2: Optional[Union[Subschema1, Subschema2]] = Field(
None, json_schema_extra={"custom_key": "custom_value"}
)
Version:
- OS: Win11
- Python version: 3.11
- datamodel-code-generator version: 0.28.2, 0.30.1
Additional context
The issue occures when serializing the schema here: https://github.com/koxudaxi/datamodel-code-generator/blob/a6acd5214a4eb823241a1b3f5d8d8a1564a595f2/src/datamodel_code_generator/parser/jsonschema.py#L642 and assigning it to a new constructure: https://github.com/koxudaxi/datamodel-code-generator/blob/a6acd5214a4eb823241a1b3f5d8d8a1564a595f2/src/datamodel_code_generator/parser/jsonschema.py#L652
=> The value of the extra field of the property schemas is lost (somehow it is stored under the key #-datamodel-code-generator-#-extras-#-special-# instead).
A workaround is to explicitely copy the extra attribute afterwards:
def parse_combined_schema(
self,
name: str,
obj: JsonSchemaObject,
path: list[str],
target_attribute_name: str,
) -> list[DataType]:
base_object = obj.dict(exclude={target_attribute_name}, exclude_unset=True, by_alias=True)
# key "extra" is renamed to '#-datamodel-code-generator-#-extras-#-special-#' by alias export
combined_schemas: list[JsonSchemaObject] = []
refs = []
for index, target_attribute in enumerate(getattr(obj, target_attribute_name, [])):
if target_attribute.ref:
combined_schemas.append(target_attribute)
refs.append(index)
# TODO: support partial ref
else:
# original code
# combined_schemas.append(
# self.SCHEMA_OBJECT_TYPE.parse_obj(
# self._deep_merge(
# base_object,
# target_attribute.dict(exclude_unset=True, by_alias=True)
# )
# )
# )
# fixed code
so = self.SCHEMA_OBJECT_TYPE.parse_obj(
self._deep_merge(
base_object,
target_attribute.dict(exclude_unset=True, by_alias=True)
)
)
# copy the 'extra' attribute
if hasattr(so, "properties") and hasattr(obj, "properties"):
for k, v in so.properties.items():
if k in obj.properties:
if obj.properties[k].extras:
v.extras = self._deep_merge(v.extras, obj.properties[k].extras)
x_of_properties = getattr(target_attribute, "properties", {})
if k in x_of_properties:
if x_of_properties[k].extras:
v.extras = self._deep_merge(v.extras, x_of_properties[k].extras)
combined_schemas.append(so)
parsed_schemas = self.parse_list_item(
name,
combined_schemas,
path,
obj,
singular_name=False,
)
common_path_keyword = f"{target_attribute_name}Common"
return [
self._parse_object_common_part(
name,
obj,
[*get_special_path(common_path_keyword, path), str(i)],
ignore_duplicate_model=True,
fields=[],
base_classes=[d.reference],
required=[],
)
if i in refs and d.reference
else d
for i, d in enumerate(parsed_schemas)
]
#2399 is weakly related, but #2400 does not fix this issue