arkouda
arkouda copied to clipboard
DataFrame.groupby.first to match pandas
def build_ak_df_with_nans():
data = {
"key1": ["valuew", "valuex", "valuew", "valuex"],
"key2": ["valueA", "valueB", "valueA", "valueB"],
"nums1": [1, np.nan, 3, 4],
"nums2": [1, np.nan, np.nan, 7],
"nums3": [10, 8, 9, 7],
"bools": [True, False, True, False],
}
ak_df = ak.DataFrame({k: ak.array(v) for k, v in data.items()})
return ak_df
df.drop('bools', axis=1, inplace=True)
pd_df = df.to_pandas()
Arkouda gives one result:
In [143]: df.groupby(["key1","key2"]).first()
Out[143]:
key1 key2 nums1 nums2 nums3
0 valuew valueA 1.0 1.0 10
1 valuex valueB NaN NaN 8 (2 rows x 5 columns)
Pandas gives a different result:
In [144]: pd_df.groupby(["key1","key2"], as_index=False).first()
Out[144]:
key1 key2 nums1 nums2 nums3
0 valuew valueA 1.0 1.0 10
1 valuex valueB 4.0 7.0 8