clumper
clumper copied to clipboard
Grouping by two columns mixes up the keys
Grouping by two columns mixes the keys up. See below. Aggregated output shows dict with 'grp_1': 'b', 'grp_2': 'c' . That combination of keys is not present in original list_dicts data.
from clumper import Clumper
list_dicts = [
{'grp_1': 'a', 'grp_2': 'a', 'a': 6},
{'grp_1': 'a', 'grp_2': 'b', 'a': 7},
{'grp_1': 'a', 'grp_2': 'c', 'a': 5},
{'grp_1': 'b', 'grp_2': 'a', 'a': 2},
{'grp_1': 'b', 'grp_2': 'b', 'a': 4},
]
(Clumper(list_dicts)
.group_by('grp_1', 'grp_2')
.agg(c=('a', 'count'),
s=('a', 'sum'),
m=('a', 'mean'))
.collect()
)
# output
[{'grp_1': 'b', 'grp_2': 'b', 'c': 1, 's': 4, 'm': 4},
{'grp_1': 'b', 'grp_2': 'a', 'c': 1, 's': 7, 'm': 7},
{'grp_1': 'b', 'grp_2': 'c', 'c': 1, 's': 2, 'm': 2}, # this key combination is not present in list_dicts
{'grp_1': 'a', 'grp_2': 'b', 'c': 1, 's': 5, 'm': 5},
{'grp_1': 'a', 'grp_2': 'c', 'c': 1, 's': 6, 'm': 6}] # 'grp_1': 'a', 'grp_2': 'a' is missing here
python 3.9.7, clumper 0.2.15