arkouda
arkouda copied to clipboard
DataFrame aggregations to handle non-numeric types
def build_ak_df_with_nans():
data = {
"key1": ["valuew", "valuex", "valuew", "valuex"],
"key2": ["valueA", "valueB", "valueA", "valueB"],
"nums1": [1, np.nan, 3, 4],
"nums2": [1, np.nan, np.nan, 7],
"nums3": [10, 8, 9, 7],
"bools": [True, False, True, False],
}
ak_df = ak.DataFrame({k: ak.array(v) for k, v in data.items()})
return ak_df
df = build_ak_df_with_nans()
pd_df = df.to_pandas()
pd_result = pd_df.groupby(["key1", "key2"], as_index=False).count()
ak_result = df.groupby(["key1", "key2"]).count()
Pandas
In [163]: pd_result
Out[163]:
key1 key2 nums1 nums2 nums3 bools
0 valuew valueA 2 1 2 2
1 valuex valueB 1 1 2 2
Arkouda
In [164]: ak_result
Out[164]:
key1 key2 nums1 nums2 nums3
0 valuew valueA 2 1 2
1 valuex valueB 1 1 2 (2 rows x 5 columns)