flox icon indicating copy to clipboard operation
flox copied to clipboard

Error when data variables have different dimensions

Open max-sixty opened this issue 9 months ago • 2 comments

Very possibly I'm mistaken; or is this not supported?

ds = xr.tutorial.load_dataset('air_temperature')

ds['air2'] = ds['air'].sum('lat')

ds.groupby('lon').count(...)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[9], line 1
----> 1 ds.groupby('lon').count(...)

File /opt/homebrew/lib/python3.9/site-packages/xarray/core/_aggregations.py:2418, in DatasetGroupByAggregations.count(self, dim, keep_attrs, **kwargs)
   2344 """
   2345 Reduce this Dataset's data by applying ``count`` along some dimension(s).
   2346
   (...)
   2411     da       (labels) int64 1 2 2
   2412 """
   2413 if (
   2414     flox_available
   2415     and OPTIONS["use_flox"]
   2416     and contains_only_chunked_or_numpy(self._obj)
   2417 ):
-> 2418     return self._flox_reduce(
   2419         func="count",
   2420         dim=dim,
   2421         numeric_only=False,
   2422         # fill_value=fill_value,
   2423         keep_attrs=keep_attrs,
   2424         **kwargs,
   2425     )
   2426 else:
   2427     return self.reduce(
   2428         duck_array_ops.count,
   2429         dim=dim,
   (...)
   2432         **kwargs,
   2433     )


File /opt/homebrew/lib/python3.9/site-packages/xarray/core/groupby.py:1034, in GroupBy._flox_reduce(self, dim, keep_attrs, **kwargs)
   1031     kwargs.setdefault("min_count", 1)
   1033 output_index = grouper.full_index
-> 1034 result = xarray_reduce(
   1035     obj.drop_vars(non_numeric.keys()),
   1036     self._codes,
   1037     dim=parsed_dim,
   1038     # pass RangeIndex as a hint to flox that `by` is already factorized
   1039     expected_groups=(pd.RangeIndex(len(output_index)),),
   1040     isbin=False,
   1041     keep_attrs=keep_attrs,
   1042     **kwargs,
   1043 )
   1045 # we did end up reducing over dimension(s) that are
   1046 # in the grouped variable
   1047 group_dims = grouper.group.dims

File /opt/homebrew/lib/python3.9/site-packages/flox/xarray.py:415, in xarray_reduce(obj, func, expected_groups, isbin, sort, dim, fill_value, dtype, method, engine, keep_attrs, skipna, min_count, reindex, *by, **finalize_kwargs)
    413 output_core_dims = [d for d in input_core_dims[0] if d not in dim_tuple]
    414 output_core_dims.extend(group_names)
--> 415 actual = xr.apply_ufunc(
    416     wrapper,
    417     ds_broad.drop_vars(tuple(missing_dim)).transpose(..., *grouper_dims),
    418     *by_da,
    419     input_core_dims=input_core_dims,
    420     # for xarray's test_groupby_duplicate_coordinate_labels
    421     exclude_dims=set(dim_tuple),
    422     output_core_dims=[output_core_dims],
    423     dask="allowed",
    424     dask_gufunc_kwargs=dict(
    425         output_sizes=group_sizes, output_dtypes=[dtype] if dtype is not None else None
    426     ),
    427     keep_attrs=keep_attrs,
    428     kwargs={
    429         "func": func,
    430         "axis": axis,
    431         "sort": sort,
    432         "fill_value": fill_value,
    433         "method": method,
    434         "min_count": min_count,
    435         "skipna": skipna,
    436         "engine": engine,
    437         "reindex": reindex,
    438         "expected_groups": tuple(expected_groups_valid_list),
    439         "isbin": isbins,
    440         "finalize_kwargs": finalize_kwargs,
    441         "dtype": dtype,
    442         "core_dims": input_core_dims,
    443     },
    444 )
    446 # restore non-dim coord variables without the core dimension
    447 # TODO: shouldn't apply_ufunc handle this?
    448 for var in set(ds_broad._coord_names) - set(ds_broad._indexes) - set(ds_broad.dims):

File /opt/homebrew/lib/python3.9/site-packages/xarray/core/computation.py:1249, in apply_ufunc(func, input_core_dims, output_core_dims, exclude_dims, vectorize, join, dataset_join, dataset_fill_value, keep_attrs, kwargs, dask, output_dtypes, output_sizes, meta, dask_gufunc_kwargs, on_missing_core_dim, *args)
   1247 # feed datasets apply_variable_ufunc through apply_dataset_vfunc
   1248 elif any(is_dict_like(a) for a in args):
-> 1249     return apply_dataset_vfunc(
   1250         variables_vfunc,
   1251         *args,
   1252         signature=signature,
   1253         join=join,
   1254         exclude_dims=exclude_dims,
   1255         dataset_join=dataset_join,
   1256         fill_value=dataset_fill_value,
   1257         keep_attrs=keep_attrs,
   1258         on_missing_core_dim=on_missing_core_dim,
   1259     )
   1260 # feed DataArray apply_variable_ufunc through apply_dataarray_vfunc
   1261 elif any(isinstance(a, DataArray) for a in args):

File /opt/homebrew/lib/python3.9/site-packages/xarray/core/computation.py:530, in apply_dataset_vfunc(func, signature, join, dataset_join, fill_value, exclude_dims, keep_attrs, on_missing_core_dim, *args)
    525 list_of_coords, list_of_indexes = build_output_coords_and_indexes(
    526     args, signature, exclude_dims, combine_attrs=keep_attrs
    527 )
    528 args = tuple(getattr(arg, "data_vars", arg) for arg in args)
--> 530 result_vars = apply_dict_of_variables_vfunc(
    531     func,
    532     *args,
    533     signature=signature,
    534     join=dataset_join,
    535     fill_value=fill_value,
    536     on_missing_core_dim=on_missing_core_dim,
    537 )
    539 out: Dataset | tuple[Dataset, ...]
    540 if signature.num_outputs > 1:

File /opt/homebrew/lib/python3.9/site-packages/xarray/core/computation.py:457, in apply_dict_of_variables_vfunc(func, signature, join, fill_value, on_missing_core_dim, *args)
    455 else:
    456     if on_missing_core_dim == "raise":
--> 457         raise ValueError(core_dim_present)
    458     elif on_missing_core_dim == "copy":
    459         result_vars[name] = variable_args[0]

ValueError: Missing core dims {'lat'} from arg number 1 on a variable named `air2`:
<xarray.Variable (time: 2920, lon: 53)>
array([[6984.9497, 6991.6606, 6991.5303, ..., 6998.77  , 7007.8804,
        7016.5605],
       [6976.4307, 6988.45  , 6993.2407, ..., 6994.3906, 7006.7505,
        7019.941 ],
       [6975.2603, 6982.02  , 6988.77  , ..., 6992.0503, 7004.9404,
        7020.3506],
       ...,
       [6990.7505, 6998.3496, 7013.3496, ..., 6995.05  , 7008.6504,
        7019.4497],
       [6984.95  , 6991.6504, 7007.949 , ..., 6994.15  , 7008.55  ,
        7020.8506],
       [6981.75  , 6983.85  , 6997.0503, ..., 6985.6494, 6999.2495,
        7012.0493]], dtype=float32)

max-sixty avatar Nov 04 '23 22:11 max-sixty

Yuck, it should work. I tried to handle it here, but I never use ... so this code path is probably not well tested.

https://github.com/xarray-contrib/flox/blob/c15572ea416236d6fdf4c9b4caeb91962c2c6a82/flox/xarray.py#L397-L406

I would appreciate any help fixing it :)

dcherian avatar Nov 05 '23 02:11 dcherian

No great stress! Easy to turn off for one calc.

I would appreciate any help fixing it :)

(just to set expectations — I'm way way over on my budget of contributing to projects vs. getting work done, so it's quite unlikely I get to this soon-ish)

max-sixty avatar Nov 05 '23 19:11 max-sixty