cattrs icon indicating copy to clipboard operation
cattrs copied to clipboard

How to deserialize multidict to data model?

Open wencan opened this issue 2 years ago • 6 comments

multidict: https://github.com/aio-libs/multidict

import dataclasses
import typing
import cattrs
import multidict


@dataclasses.dataclass
class Model:
    a: typing.List[str]
    b: str


d = multidict.MultiDict([('a', '111'), ('b', '2'), ('a', '333')])
obj = cattrs.structure(d, Model)
print(obj)

d = multidict.MultiDict([('a', '111'), ('b', '2')])
obj = cattrs.structure(d, Model)
print(obj)

want:

Model(a=['111', '333'], b='222')
Model(a=['111'], b='222')

actual:

Model(a=['1', '1', '1'], b='222')
Model(a=['1', '1', '1'], b='222')

wencan avatar Aug 04 '23 10:08 wencan

That's not suprising, MultiDict[key] returns the first value for the key.

The simplest solution is to preprocess the multidict into a normal dictionary with potential lists for values.

import dataclasses
import typing

import multidict

import cattrs


@dataclasses.dataclass
class Model:
    a: typing.List[str]
    b: str


def md_to_dict(d: multidict.MultiDict) -> dict:
    return {k: vs if len(vs := d.getall(k)) > 1 else vs[0] for k in d}


d = multidict.MultiDict([("a", "111"), ("b", "2"), ("a", "333")])
obj = cattrs.structure(md_to_dict(d), Model)
print(obj)

Tinche avatar Aug 04 '23 14:08 Tinche

@Tinche My current solution is the same as yours. Please try the following example:

d = multidict.MultiDict([("a", "111"), ("b", "2")])
obj = cattrs.structure(md_to_dict(d), Model)
print(obj)

It will output:

Model(a=['1', '1', '1'], b='2')

wencan avatar Aug 04 '23 14:08 wencan

Ah yeah, I see.

The issue is 111 is actually a valid list[str] because in Python, strings are sequences of strings (characters). You could change the validation but it wouldn't help you, it would just raise an error instead of doing what you want.

This is doable but tricky. Let me think about it.

Tinche avatar Aug 04 '23 15:08 Tinche

@Tinche

It seems, register_structure_hook_factory can solve this problem. please review my code:

import dataclasses
import datetime
import typing
import functools
import cattrs
import cattrs.gen
import multidict


@dataclasses.dataclass
class Model:
    a: tuple[str, ...]  # or typing.Tuple[str, ...]
    b: tuple[int, ...]
    c: str
    d: tuple[datetime.datetime, ...] = dataclasses.field(
        default_factory=datetime.datetime.now)  # type: ignore


converter = cattrs.Converter()
converter.register_structure_hook(
    datetime.datetime, lambda value, _: datetime.datetime.fromisoformat(value))


def make_multidict_structure_fn(cls):
    def structure(data, cls):
        if isinstance(data, list):
            return tuple([converter.structure(item, typing.get_args(cls)[0]) for item in data])
        return (converter.structure(data, typing.get_args(cls)[0]), )
    return structure


converter.register_structure_hook_factory(
    lambda cls: typing.get_origin(cls) is tuple,
    lambda cls: make_multidict_structure_fn(cls)
)


def multidict_to_dict(d: multidict.MultiDict) -> dict:
    return {k: vs if len(vs := d.getall(k)) > 1 else vs[0] for k in d}


d = multidict.MultiDict(
    [('a', '111'), ('b', '222'), ('c', 'abc'), ('d', '2023-08-14T08:54:35.055481+00:00')])
obj = converter.structure(multidict_to_dict(d), Model)
print(obj)  # Model(a=('111',), b=(222,), c='abc', d=(datetime.datetime(2023, 8, 14, 8, 54, 35, 55481, tzinfo=datetime.timezone.utc),))

d = multidict.MultiDict(
    [('a', '111'), ('a', '333'), ('b', '222'), ('c', 'abc')])
obj = converter.structure(multidict_to_dict(d), Model)
print(obj)  # Model(a=('111', '333'), b=(222,), c='abc', d=(...,))


d = multidict.MultiDict(
    [('a', '111'), ('b', '222'), ('b', '555'), ('a', '333'), ('c', 'abc'), ('d', '2023-08-14T08:54:35.055481+00:00'), ('d', '2023-08-16T08:54:35.055481+00:00')])
obj = converter.structure(multidict_to_dict(d), Model)
print(obj)  # Model(a=('111', '333'), b=(222, 555), c='abc', d=(datetime.datetime(2023, 8, 14, 8, 54, 35, 55481, tzinfo=datetime.timezone.utc), datetime.datetime(2023, 8, 16, 8, 54, 35, 55481, tzinfo=datetime.timezone.utc)))

wencan avatar Aug 11 '23 09:08 wencan

I see you're overriding hooks for tuples, and sure, that's a fine approach.

For my approach, I tried something different: I overrode the hooks for dataclasses.

Here's your example with my approach (I added some type hints for my convenience):

import dataclasses
import datetime
import typing
from collections.abc import Callable

import multidict

import cattrs
import cattrs.gen


@dataclasses.dataclass
class Model:
    a: tuple[str, ...]  # or typing.Tuple[str, ...]
    b: tuple[int, ...]
    c: str
    d: tuple[datetime.datetime, ...] = dataclasses.field(
        default_factory=datetime.datetime.now
    )  # type: ignore


converter = cattrs.Converter()
converter.register_structure_hook(
    datetime.datetime, lambda value, _: datetime.datetime.fromisoformat(value)
)


def dataclass_hook_factory(
    cl: type,
) -> Callable[[multidict.MultiDict, typing.Any], typing.Any]:
    seq_fields = [
        f.name for f in dataclasses.fields(cl) if typing.get_origin(f.type) in (tuple,)
    ]

    orig_fn = cattrs.gen.make_dict_structure_fn(cl, converter)

    def structure_dc(val: multidict.MultiDict, _: typing.Any):
        mapped_val = {}
        for k, v in val.items():
            if k in seq_fields:
                mapped_val[k] = val.getall(k)
            else:
                mapped_val[k] = v
        return orig_fn(mapped_val, _)

    return structure_dc


converter.register_structure_hook_factory(
    dataclasses.is_dataclass, dataclass_hook_factory
)

## Tests

d = multidict.MultiDict(
    [
        ("a", "111"),
        ("b", "222"),
        ("c", "abc"),
        ("d", "2023-08-14T08:54:35.055481+00:00"),
    ]
)
obj = converter.structure(d, Model)
print(obj)

d = multidict.MultiDict([("a", "111"), ("a", "333"), ("b", "222"), ("c", "abc")])
obj = converter.structure(d, Model)
print(obj)  # Model(a=('111', '333'), b=(222,), c='abc', d=(...,))


d = multidict.MultiDict(
    [
        ("a", "111"),
        ("b", "222"),
        ("b", "555"),
        ("a", "333"),
        ("c", "abc"),
        ("d", "2023-08-14T08:54:35.055481+00:00"),
        ("d", "2023-08-16T08:54:35.055481+00:00"),
    ]
)
obj = converter.structure(d, Model)
print(obj)

Once nice thing about my approach is that it can be configured to handle lists too, I believe.

Tinche avatar Aug 14 '23 18:08 Tinche