arkouda icon indicating copy to clipboard operation
arkouda copied to clipboard

DataFrame aggregations to handle non-numeric types

Open ajpotts opened this issue 10 months ago • 0 comments

def build_ak_df_with_nans():
    data = {
        "key1": ["valuew", "valuex", "valuew", "valuex"],
        "key2": ["valueA", "valueB", "valueA", "valueB"],
        "nums1": [1, np.nan, 3, 4],
        "nums2": [1, np.nan, np.nan, 7],
        "nums3": [10, 8, 9, 7],
        "bools": [True, False, True, False],
    }

    ak_df = ak.DataFrame({k: ak.array(v) for k, v in data.items()})
    return ak_df

df = build_ak_df_with_nans()
pd_df = df.to_pandas()


pd_result = pd_df.groupby(["key1", "key2"], as_index=False).count()
ak_result = df.groupby(["key1", "key2"]).count()

Pandas

In [163]: pd_result
Out[163]: 
     key1    key2  nums1  nums2  nums3  bools
0  valuew  valueA      2      1      2      2
1  valuex  valueB      1      1      2      2

Arkouda

In [164]: ak_result
Out[164]: 
     key1    key2  nums1  nums2  nums3
0  valuew  valueA      2      1      2
1  valuex  valueB      1      1      2 (2 rows x 5 columns)

ajpotts avatar Apr 24 '24 18:04 ajpotts