siuba
siuba copied to clipboard
Implement tidyr's fill
Based on the pandas issue posed in this tweet. Here's a quick version of fill.
from pandas.core.groupby import DataFrameGroupBy
from pandas import DataFrame
from siuba.dply.verbs import singledispatch2
@singledispatch2((DataFrame, DataFrameGroupBy))
def fill(__data, *args, direction = "down"):
col_names = list(args)
to_fill_cols = __data[col_names]
if direction == "down":
filled = to_fill_cols.ffill()
elif direction == "up":
filled = to_fill_cols.bfill()
# if data is grouped, get DataFrame out and copy
new_df = getattr(__data, "obj", __data).copy()
new_df[col_names] = filled
return new_df
To complete, basically just needs to get the name of any symbols used, via
col_names = list(map(simple_varname, args))
And adding the function to siuba.dplyr.verbs
Example
import numpy as np, pandas as pd
from siuba import _
# code to be compared to group-and-fill.R
# task: fill specific columns down, within each group, ordering by the order.
df = pd.DataFrame(
columns=["group", "order", "attribute_1", "attribute_2", "irrelevant"],
data = [
["a", 0, 1, 1, "hello"],
["a", 2, np.nan, 3, np.nan], # this one out of order
["a", 1, 6, np.nan, "world"],
["b", 0, 2, 7, "foo" ],
["b", 1, np.nan, 10, "bar" ],
]
)
from siuba import _, arrange, group_by
(
df
>> arrange(_.group, _.order)
>> group_by("group")
>> fill("attribute_1", "attribute_2")
)