siuba icon indicating copy to clipboard operation
siuba copied to clipboard

Implement tidyr's fill

Open machow opened this issue 3 years ago • 0 comments

Based on the pandas issue posed in this tweet. Here's a quick version of fill.

from pandas.core.groupby import DataFrameGroupBy
from pandas import DataFrame

from siuba.dply.verbs import singledispatch2

@singledispatch2((DataFrame, DataFrameGroupBy))
def fill(__data, *args, direction = "down"):
    col_names = list(args)

    to_fill_cols = __data[col_names]
    
    if direction == "down":
        filled = to_fill_cols.ffill()
    elif direction == "up":
        filled = to_fill_cols.bfill()
    
    # if data is grouped, get DataFrame out and copy
    new_df = getattr(__data, "obj", __data).copy()
    
    new_df[col_names] = filled
    return new_df

To complete, basically just needs to get the name of any symbols used, via

col_names = list(map(simple_varname, args))

And adding the function to siuba.dplyr.verbs

Example

import numpy as np, pandas as pd
from siuba import _

# code to be compared to group-and-fill.R
# task: fill specific columns down, within each group, ordering by the order.
df = pd.DataFrame(
    columns=["group", "order", "attribute_1", "attribute_2", "irrelevant"],
    data = [
        ["a",    0,      1,      1, "hello"],
        ["a",    2, np.nan,      3,  np.nan],  # this one out of order
        ["a",    1,      6, np.nan, "world"],
        ["b",    0,      2,      7, "foo"  ],
        ["b",    1, np.nan,     10, "bar"  ],
    ]
)

from siuba import _, arrange, group_by

(
    df
    >> arrange(_.group, _.order)
    >> group_by("group")
    >> fill("attribute_1", "attribute_2")
)

machow avatar Aug 11 '20 18:08 machow