www
www copied to clipboard
Reconciling Dataclasses And Properties In Python
Reconciling Dataclasses And Properties In Python
I love Python dataclasses, but combining them with properties is not obvious. This is a problem solving report — and a practical introduction to dataclasses!
https://florimond.dev/blog/articles/2018/10/reconciling-dataclasses-and-properties-in-python/
I like your solution, but mypy isn't happy:
>mypy vehicle.py
vehicle.py:10: error: Name 'wheels' already defined on line 7
This was really helpful! I had the mypy error as well and just changed the name of the variable from wheels to wheels_ and used post_init() to assign it to the property.
What if I want wheels to have a default value? E.g. ... wheels: int = 4
Couldn't you just do: ...
wheels_: int = field(default=4, init=False, repr=False)
def __post_init__(self):
self.wheels = wheels_
@cp2boston: The value of the wheels property then has type property:
$ python -i wheels.py
>>> v
Vehicle(wheels=<property object at 0x7f6944a97e50>)
>>> v.wheels
<property object at 0x7f6944a97e50>
>>>
here's a sample that seems to work:
from dataclasses import dataclass, field
@dataclass
class Vehicle:
wheels_: int = field(default=4, init=False, repr=False)
@property
def wheels(self) -> str:
return self.wheels_
@wheels.setter
def wheels(self, num_wheels: int):
self.wheels_ = num_wheels
v = Vehicle()
print(v.wheels)
v.wheels = 6
print(v.wheels)
@cp2boston: And how does v = Vehicle(wheels=5) play out in that class?
Good point. My use case was for the value, e.g. wheels, to be set after the instance had been created. I didn't need to provide it to the constructor. As for the mypy message, the mypy folks recognize it as a false positive and have no plans to fix it, so, I would code it up as Florimond explained and ignore the mypy warning.
I got it working by putting the property creation outside the class. I think it is only semi-ugly compared to messing with an extra dataclass attribute:
from dataclasses import dataclass, field
@dataclass
class Vehicle:
wheels: int = 1
def get_wheels(self) -> int:
return self._wheels
def set_wheels(self, wheels: int):
self._wheels = wheels
Vehicle.wheels = property(Vehicle.get_wheels, Vehicle.set_wheels)
v = Vehicle()
print(v)
v = Vehicle(wheels=6)
print(v)
Ugly? Not so much. It's a rather nice solution.
I've come up with the following solution:
from dataclasses import dataclass
@dataclass
class Foo:
bar: int = field_property(default=0) # Same parameter than dataclasses.field
@field_property(bar) # Equivalent to @field_property(bar).getter
def get_bar(self) -> int:
# unwrap_property(self).bar is equivalent to self._bar
# but it's type-checked and linter-friendly
return unwrap_property(self).bar
# Generated by field_property (deleter too, but omitted for concision)
# @field_property(bar).setter
# def set_bar(self, value: int):
# unwrap_property(self).bar = value
assert repr(Foo()) == repr(Foo(0)) == "Foo(bar=0)"
Implementation:
import dataclasses
from collections.abc import Mapping
from typing import Any, Callable, Optional, TypeVar, cast, overload
class PropertyUnwraper:
_obj_attr = "obj"
def __init__(self, obj):
object.__setattr__(self, PropertyUnwraper._obj_attr, obj)
def __getattribute__(self, name):
obj = object.__getattribute__(self, PropertyUnwraper._obj_attr)
return getattr(obj, "_" + name)
def __setattr__(self, name, value):
obj = object.__getattribute__(self, PropertyUnwraper._obj_attr)
setattr(obj, "_" + name, value)
def __delattr__(self, name):
obj = object.__getattribute__(self, PropertyUnwraper._obj_attr)
delattr(obj, "_" + name)
T = TypeVar("T")
def unwrap_property(self: T) -> T:
return cast(T, PropertyUnwraper(self))
def _is_frozen(cls: type) -> bool:
return getattr(cls, dataclasses._PARAMS).frozen
Func = TypeVar("Func", bound=Callable)
class FieldProperty:
def __init__(self, field: dataclasses.Field):
self.field = field
self.fget: Optional[Callable] = None
self.fset: Optional[Callable] = None
self.fdel: Optional[Callable] = None
def __call__(self, func: Func) -> Func:
return self.getter(func)
def getter(self, func: Func) -> Func:
self.fget = func
return func
def setter(self, func: Func) -> Func:
self.fset = func
return func
def deleter(self, func: Func) -> Func:
self.fdel = func
return func
def __set_name__(self, owner, name):
field = self.field
if self.fget is None:
self.fget = lambda self: getattr(self, "_" + field.name)
if self.fset is None:
def fset(self, value):
if _is_frozen(owner):
def fset(self, value):
raise dataclasses.FrozenInstanceError(
f"cannot assign to field {field.name!r}"
)
else:
fset = lambda self, value: setattr(self, "_" + field.name, value)
setattr(owner, field.name, getattr(owner, field.name).setter(fset))
setattr(self, field.name, value)
self.fset = fset
if self.fdel is None:
def fdel(self, value):
if _is_frozen(owner):
def fdel(self, value):
raise dataclasses.FrozenInstanceError(f"cannot delete field {field.name!r}")
else:
fdel = lambda self: delattr(self, "_" + field.name)
setattr(owner, field.name, getattr(owner, field.name).deleter(fdel))
setattr(self, field.name, value)
self.fdel = fdel
class Property(property):
if field.default is not dataclasses.MISSING:
_default_factory = lambda default=field.default: default
elif field.default_factory is not dataclasses.MISSING:
_default_factory = field.default_factory
else:
def _default_factory():
raise TypeError(f"Missing parameter {field.name}")
def setter(self, fset: Callable[[Any, Any], None]) -> "Property":
def handle_property_default(self, value):
if isinstance(value, property):
if Property._default_factory is None:
raise TypeError(f"Missing parameter {field.name!r}")
else:
value = Property._default_factory()
fset(self, value)
return super().setter(handle_property_default)
self.field.default = Property(self.fget).setter(self.fset).deleter(self.fdel)
self.field.default_factory = dataclasses.MISSING
setattr(owner, name, field)
@overload
def field_property(
*,
default: Any = ...,
default_factory: Callable[[], Any] = None,
init=True,
repr=True,
hash=None,
compare=True,
metadata: Mapping = None,
) -> Any:
...
@overload
def field_property(field: Any) -> FieldProperty:
...
def field_property(field=None, **kwargs):
if field is None:
return FieldProperty(dataclasses.field(**kwargs))
elif not isinstance(field, FieldProperty):
raise ValueError(f"Invalid field property {field}")
else:
return field
Does that deserve a Pypi package? 🤔
Lovely @wyfo !
I think it deserves to become a PEP in the standard dataclasses-package.
I don't know if you know about @dataclass(frozen=True) for data classes. This allows you to have read-only fields for everything. So I would say your comment about dataclasses were designed to be editable data containers is incorrect. It was also designed to immutable containers of data. It means that you can rely on this not to change. I often also do @dataclass(eq=True, frozen=True) with these sorts of data-classes because they can then be safely used as dictionary keys since they won't change after they are created.
So I ran into an issue here where instantiating the class without a value for property returns the property object itself. v = Vehicle() returns setting wheels to <property object at 0x1224a8270> getting wheels. Do you know how I can get that to return with None for Vehicle().wheels rather than the property object? Setting the default didn't seem to fix the issue, however removing init=False partially fixed the issue, although the setter still prints the property object, it does appear to actually set it correctly
Just to add to the above, using init=True obviously breaks setting in the instantiation given that the value will be none for the private attribute
FYI, I tried all of these for a dataclass with a mixture of required and optional parameters, and the only one that actually works is this one.
A little ugly, but working counts for everything!
The solution in the article got weird results if you try to add a default value: suddenly you get the property object instead of the value...
One important thing that seems not to work, is that both mypy and pylint complain:
# mypy
error: Name 'name' already defined on line 123
# pylint
E0102: method already defined line 123 (function-redefined)
I am not sure how to avoid these as both are essential part of the dev-toolkit.
Oh, bad news!
To fix the pylint error, something like # noqa: E0102 at the end of the line likely works.
I use flake8 in my current projects, but it should be very similar. However, flake8 does not complain about this line.
(Here's a comparison of flake8 and pylint which claims that pylint flags some useful features.)
I have no idea how to disable that error on that line in mypy! (I have not yet found mypy to be essential. The last project I added it to, it was a time suck for developers and caught no errors, so we removed it.)
For the moment I used comments to disable both, but I really hope we can find a better solution.
Hmmm, I like your solution better than mine, which is wordy.
Looking at my notes, I hadn't thought of having two variables, name and _name. Yes, that's inelegant, but my solution is even more inelegant.
Thanks for the tip!
(By the way, I wasn't able to install your mk in either 3.8 or a clean 3.6 environment using pip3 or pipx install mk, with an error on mklib/path.py, line 723 involving an octal constant 0777, which hasn't worked since Python 2!)
mk requires py38/py39 as I did not had time to bother about the others. I may add 37/36 support later but i needed speed for initial proptotype. Also I did not test installing it on many platforms. File a bug and mention details, I will try to fix it (hopefully is not Windows related).
No, it has the issue I ran into in my notes - it doesn't work with default arguments:
https://gist.github.com/rec/7fb22cb5733a2af2e6bb6fd589ab71b7
fortunately, this is even easier with attrs
import attr
@attr.s(auto_attribs=True)
class Vehicle:
_wheels: int
@property
def wheels(self) -> int:
return self._wheels
@wheels.setter
def wheels(self, wheels: int):
self._wheels = wheels
it understands the leading _ convention, so Vehicle(wheels=1) or Vehicle(1) Just Works. example executable code: https://replit.com/@habnabit/UpbeatEnragedNumerator
@habnabit it's not exactly the same though. The dataclass version will have the init function go through the setter to do the setting. The attrs version will not. I would say that the dataclass behavior seems preferable; you'd sort of expect foo.x = bar to run similar x related logic to Foo(x=bar).
@quicknir otoh i can't remember any time i've wanted to run the setter at init time. do you think that's a common thing?
@habnabit I kind of feel the opposite? If the setter is doing for example some kind of validation, or recording of the setting, then I'd definitely want it to happen at at init time as well. But I don't use setters/getters that often to start with.
So I ran into an issue here where instantiating the class without a value for property returns the property object itself.
v = Vehicle()returnssetting wheels to <property object at 0x1224a8270> getting wheels. Do you know how I can get that to return withNoneforVehicle().wheelsrather than the property object? Setting the default didn't seem to fix the issue, however removinginit=Falsepartially fixed the issue, although the setter still prints the property object, it does appear to actually set it correctly
Also wanted to note that I had the same issue as @iccyp when using the solution from the article and omitting the wheels argument to the constructor. I found @mortlind 's solution worked perfectly, however it had 2 main issues in my case:
get_wheelsandset_wheelsexposed as public methods. I guess you could fix that by adding an underscore in front of their method names, but it doesn't look as nice as property methods.- Might be just me, but you can forgot to add that last line
Vehicle.wheels = property(Vehicle.get_wheels, Vehicle.set_wheels), especially if you're adding another attr with a getter/setter to the class.
The solution that works for me is to modify the one from the article slightly, by setting init=False on the dataclass itself and defining the own constructor (with a default value in the argument list). Also since you're explicitly settings _wheels attribute via the constructor, it looks like the _wheels type annotation is not needed anymore (commented out that below). This is not much better than @mortlind solution but one that currently works for me - also this way it's a little harder to forget about calling the getter/setter via the init constructor method.
from dataclasses import dataclass
from typing import Union
@dataclass(init=False)
class Vehicle:
wheels: int
# _wheels: int = field(init=False, repr=False)
def __init__(self, wheels: Union[int, str] = 2):
self.wheels = wheels
@property
def wheels(self) -> int:
print("getting wheels")
return self._wheels
@wheels.setter
def wheels(self, wheels: Union[int, str]):
print("setting wheels to", wheels)
self._wheels = int(wheels)
# Example for testing
v = Vehicle()
print(v)
v = Vehicle(wheels=3)
print(v)
v = Vehicle('6')
print(v)
dataclasses were designed to be editable data containers. If you really need read-only fields, you shouldn't be resorting to dataclasses in the first place.
That's not true. The dataclass decorator has a frozen parameter to set the class as being immutable : @dataclass(frozen=True).
There is one thing I don't like with the solutions proposed in the article and in the comments : they are all tricky and subtle.
It's easy to get something wrong and someone reading that code may not be aware of the all the issues at hand. Not the sort of things I want in my code.
I'd rather go for a more explicit solution: defining the __init__ manually or using a normal class.
"""dataclass with read/write property using __init__"""
from dataclasses import dataclass, field
@dataclass
class Vehicle:
wheels: int
def __init__(self, wheels):
self._wheels = wheels
@property
def wheels(self) -> int:
print("getting wheels")
return self._wheels
@wheels.setter
def wheels(self, wheels: int):
print("setting wheels to", wheels)
self._wheels = wheels
I was reading through an excellent discussion on SO about this, and was inspired to tackle yet another attempt at adding property support for dataclasses, with a solution that IDEs hopefully won't complain about.
Here's my revised approach with using a metaclass to set up property support. Using metaclasses is also helpful so we don't need to declare the property after the class definition (for example such as in @mortlind's great solution). I could have also implemented a class decorator using the same approach, but some IDEs seem to not play well with that so I decided to use a metaclass instead, as it seems to be a bit more innocuous.
In both examples below, wheels defaults to 4 if it's not explicitly passed in via the constructor.
@dataclass
class Vehicle(metaclass=dataclass_property_support):
wheels: Union[int, str] = property
_wheels: int = field(default=4, init=False, repr=False)
@wheels
def wheels(self) -> int:
print("getting wheels")
return self._wheels
@wheels.setter
def wheels(self, wheels: Union[int, str]):
print("setting wheels to", wheels)
self._wheels = int(wheels)
This is a similar approach which I somewhat prefer more. It implicitly sets the wheels instance attribute to point to the property _wheels which begins with an underscore.
@dataclass
class Vehicle(metaclass=dataclass_property_support):
wheels: Union[int, str] = 4
@property
def _wheels(self) -> int:
print("getting wheels")
return self._wheels
@_wheels.setter
def _wheels(self, wheels: Union[int, str]):
print("setting wheels to", wheels)
self._wheels = int(wheels)
Then you can call it like below, and the IDE seems to be fine with it.
v = Vehicle()
print(v)
v = Vehicle(wheels=3)
print(v)
v = Vehicle('6')
print(v)
# Confirm that we go through our setter method
v.wheels = '123'
assert v.wheels == 123
The implementation of the dataclass_property_support metaclass is mostly straightforward and can be found in the answer I added on the linked SO article.