stac-pydantic
stac-pydantic copied to clipboard
ItemProperties modifies input data in place
While trying to do some benchmarking I noticed that somehow the input datetime string was being modified in place when validating the model. Which was extremely confusing to me. I did some more testing and found this: https://github.com/stac-utils/stac-pydantic/blob/main/stac_pydantic/item.py#L38
It is modifying the input data before it gets to the pydantic validators, and doing extra validation within python when pydantic can handle it better / faster.
I will submit a related PR shortly.
This is what I used for testing purposes:
import copy
from datetime import datetime
from pydantic import BaseModel
from stac_pydantic import Item, ItemProperties
class MyModel(BaseModel):
date: datetime
num: int
text: str
MY_MODEL_DICT = {
"date": "2022-11-21T19:09:07.587000Z",
"num": "12345",
"text": "Some Text",
}
def test_mymodel_unpack():
test_dict = copy.deepcopy(MY_MODEL_DICT)
_ = MyModel(**test_dict)
assert test_dict == MY_MODEL_DICT
def test_mymodel_validate():
test_dict = copy.deepcopy(MY_MODEL_DICT)
_ = MyModel.model_validate(test_dict)
assert test_dict == MY_MODEL_DICT
class NestedMyModel(BaseModel):
my_model: MyModel
NESTED_MY_MODEL_DICT = {
"my_model": {
"date": "2022-11-21T19:09:07.587000Z",
"num": "12345",
"text": "Some Text",
}
}
def test_nested_mymodel_unpack():
test_dict = copy.deepcopy(NESTED_MY_MODEL_DICT)
_ = NestedMyModel(**test_dict)
assert test_dict == NESTED_MY_MODEL_DICT
def test_nested_mymodel_validate():
test_dict = copy.deepcopy(NESTED_MY_MODEL_DICT)
_ = NestedMyModel.model_validate(test_dict)
assert test_dict == NESTED_MY_MODEL_DICT
ITEM_DICT = {
"type": "Feature",
"stac_version": "1.0.0",
"id": "my_item",
"collection": "mycollection",
"geometry": {
"coordinates": [
[
[149.19940003830305, -36.428645762574625],
[149.05104951177714, -39.259123256881225],
[152.81379005469256, -39.32599372427865],
[152.82080633518694, -36.489064495233244],
[149.19940003830305, -36.428645762574625],
]
],
"type": "Polygon",
},
"bbox": [
149.05104951177714,
-39.32599372427865,
152.82080633518694,
-36.428645762574625,
],
"properties": {
"datetime": "2022-11-21T19:09:07.587000Z",
"created": "2023-08-03T00:31:23.346946Z",
"updated": "2023-08-03T00:31:23.346946Z",
},
"stac_extensions": [],
"links": [
{
"rel": "self",
"type": "application/geo+json",
"href": "https://my.stac/v0/collections/mycollection/items/my_item",
},
{
"rel": "collection",
"type": "application/json",
"href": "https://my.stac/v0/collections/mycollection",
},
{
"rel": "parent",
"type": "application/json",
"href": "https://my.stac/v0/collections/mycollection",
},
],
"assets": {
"data": {
"href": "https://my.stac/v0/collections/mycollection/items/my_item/data.tif",
"title": "Data",
"media_type": "image/tiff; application=geotiff",
"roles": ["data"],
},
"thumbnail": {
"href": "https://my.stac/v0/collections/mycollection/items/my_item/thumbnail.png",
"media_type": "image/png",
"roles": ["thumbnail"],
},
"metadata": {
"href": "https://my.stac/v0/collections/mycollection/items/my_item/metadata.json",
"media_type": "application/json",
"roles": ["metadata"],
},
},
}
def test_item_unpack():
item_dict = copy.deepcopy(ITEM_DICT)
_ = Item(**item_dict)
assert item_dict == ITEM_DICT
def test_item_validate():
item_dict = copy.deepcopy(ITEM_DICT)
_ = Item.model_validate(item_dict)
assert item_dict == ITEM_DICT
ITEM_PROPERTIES_DICT = {
"datetime": "2022-11-21T19:09:07.587000Z",
"created": "2023-08-03T00:31:23.346946Z",
"updated": "2023-08-03T00:31:23.346946Z",
}
def test_item_properties_unpack():
item_dict = copy.deepcopy(ITEM_PROPERTIES_DICT)
_ = ItemProperties(**item_dict)
assert item_dict == ITEM_PROPERTIES_DICT
def test_item_properties_validate():
item_dict = copy.deepcopy(ITEM_PROPERTIES_DICT)
_ = ItemProperties.model_validate(item_dict)
assert item_dict == ITEM_PROPERTIES_DICT
Three of them fail with current main, and they all pass with my PR.
@eseglem I think the real issue here is that stac-pydantic will serialized the datetime using https://github.com/stac-utils/stac-pydantic/blob/c03272e7e1a9730d9d7dd18794bd660005c3ed0b/stac_pydantic/shared.py#L20
while your input data was in form of %Y-%m-%dT%H:%M:%S.%fZ
which is still a valid RFC3339 format
I can't find a straight forward path to make sure the serialization respect the input date other than doing something like
@field_serializer("datetime")
def serialize_datetime(self, v: dt, _info: Any) -> str:
if str(v)[19] == ".":
return v.strftime("%Y-%m-%dT%H:%M:%S.%fZ")
else:
return v.strftime("%Y-%m-%dT%H:%M:%SZ")
Oo I see the value do get converted to datetime object during the validation
step (pre) and thus the value in the input dictionary is changed 😓
so I guess #131 is 👍