xcdat
xcdat copied to clipboard
[Bug]: Improve error message when `temporal.departures()` `reference_period` arg is invalid
What happened?
The error message when the reference_period arg is not valid is not clear and can confuse the end-user (AttributeError: 'IndexVariable' object has no attribute 'month').
What did you expect to happen? Are there are possible answers you came across?
There should be logic to detect whether there are time coordinates that cover the entire reference period. If there are missing time coordinates, raise an error early on about this (e.g., "Invalid reference_period selected. The time coordinates for this dataset span <START, END>."
Minimal Complete Verifiable Example (MVCE)
# %%
import cftime
import numpy as np
import xarray as xr
import xcdat as xc
time_yearly = xr.DataArray(
data=np.array(
[
cftime.DatetimeGregorian(2000, 7, 1, 12, 0, 0, 0, has_year_zero=False),
cftime.DatetimeGregorian(2001, 7, 1, 12, 0, 0, 0, has_year_zero=False),
cftime.DatetimeGregorian(2002, 7, 1, 12, 0, 0, 0, has_year_zero=False),
],
dtype=object,
),
dims=["time"],
attrs={
"axis": "T",
"long_name": "time",
"standard_name": "time",
"bounds": "time_bnds",
},
)
time_bnds_yearly = xr.DataArray(
name="time_bnds",
data=np.array(
[
[
cftime.DatetimeGregorian(2000, 1, 1, 0, 0, 0, 0, has_year_zero=False),
cftime.DatetimeGregorian(2001, 1, 1, 0, 0, 0, 0, has_year_zero=False),
],
[
cftime.DatetimeGregorian(2001, 1, 1, 0, 0, 0, 0, has_year_zero=False),
cftime.DatetimeGregorian(2002, 1, 1, 0, 0, 0, 0, has_year_zero=False),
],
[
cftime.DatetimeGregorian(2002, 1, 1, 0, 0, 0, 0, has_year_zero=False),
cftime.DatetimeGregorian(2003, 1, 1, 0, 0, 0, 0, has_year_zero=False),
],
],
dtype=object,
),
dims=["time", "bnds"],
attrs={
"xcdat_bounds": "True",
},
)
lat = xr.DataArray(
data=np.array([-90, -88.75, 88.75, 90]),
dims=["lat"],
attrs={"units": "degrees_north", "axis": "Y", "standard_name": "latitude"},
)
lon = xr.DataArray(
data=np.array([0, 1.875, 356.25, 358.125]),
dims=["lon"],
attrs={"units": "degrees_east", "axis": "X", "standard_name": "longitude"},
)
ts_decoded = xr.DataArray(
name="ts",
data=np.ones((3, 4, 4)),
coords={"time": time_yearly, "lat": lat, "lon": lon},
dims=["time", "lat", "lon"],
)
ds = xr.Dataset(
data_vars={"ts": ts_decoded, "time_bnds": time_bnds_yearly},
coords={"lat": lat.copy(), "lon": lon.copy(), "time": time_yearly},
)
# %%
ds = ds.temporal.departures(
"ts", freq="month", reference_period=("2009-01-01", "2010-01-01")
)
Relevant log output
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
File /opt/miniconda3/envs/xcdat_scipy_2024/lib/python3.11/site-packages/xarray/core/dataarray.py:863, in DataArray._getitem_coord(self, key)
862 try:
--> 863 var = self._coords[key]
864 except KeyError:
KeyError: 'time.month'
During handling of the above exception, another exception occurred:
AttributeError Traceback (most recent call last)
<ipython-input-9-03cd046f6c77> in ?()
1 # %%
----> 2 ds = ds.temporal.departures(
3 "ts", freq="month", reference_period=("2009-01-01", "2010-01-01")
4 )
~/repositories/xcdat/xcdat/temporal.py in ?(self, data_var, freq, weighted, keep_weights, reference_period, season_config)
766 keep_weights,
767 season_config,
768 )
769
--> 770 ds_climo = ds.temporal.climatology(
771 data_var,
772 freq,
773 weighted,
~/repositories/xcdat/xcdat/temporal.py in ?(self, data_var, freq, weighted, keep_weights, reference_period, season_config)
567 }
568 """
569 self._set_data_var_attrs(data_var)
570
--> 571 return self._averager(
572 data_var,
573 "climatology",
574 freq,
~/repositories/xcdat/xcdat/temporal.py in ?(self, data_var, mode, freq, weighted, keep_weights, reference_period, season_config)
834
835 if self._mode == "average":
836 dv_avg = self._average(ds, data_var)
837 elif self._mode in ["group_average", "climatology", "departures"]:
--> 838 dv_avg = self._group_average(ds, data_var)
839
840 # The original time dimension is dropped from the dataset because
841 # it becomes obsolete after the data variable is averaged. When the
~/repositories/xcdat/xcdat/temporal.py in ?(self, ds, data_var)
1194 dv = _get_data_var(ds, data_var)
1195
1196 # Label the time coordinates for grouping weights and the data variable
1197 # values.
-> 1198 self._labeled_time = self._label_time_coords(dv[self.dim])
1199
1200 if self._weighted:
1201 time_bounds = ds.bounds.get_bounds("T", var_key=data_var)
~/repositories/xcdat/xcdat/temporal.py in ?(self, time_coords)
1369 >>> dtype='datetime64[ns]')
1370 >>> Coordinates:
1371 >>> * time (time) datetime64[ns] 2000-01-01T00:00:00 ... 2000-04-01T00:00:00
1372 """
-> 1373 df_dt_components: pd.DataFrame = self._get_df_dt_components(time_coords)
1374 dt_objects = self._convert_df_to_dt(df_dt_components)
1375
1376 time_grouped = xr.DataArray(
~/repositories/xcdat/xcdat/temporal.py in ?(self, time_coords)
1423
1424 # Use the TIME_GROUPS dictionary to determine which components
1425 # are needed to form the labeled time coordinates.
1426 for component in TIME_GROUPS[self._mode][self._freq]:
-> 1427 df[component] = time_coords[f"{self.dim}.{component}"].values
1428
1429 # The season frequency requires additional datetime components for
1430 # processing, which are later removed before time coordinates are
/opt/miniconda3/envs/xcdat_scipy_2024/lib/python3.11/site-packages/xarray/core/dataarray.py in ?(self, key)
870 def __getitem__(self, key: Any) -> Self:
871 if isinstance(key, str):
--> 872 return self._getitem_coord(key)
873 else:
874 # xarray-style array indexing
875 return self.isel(indexers=self._item_key_to_dict(key))
/opt/miniconda3/envs/xcdat_scipy_2024/lib/python3.11/site-packages/xarray/core/dataarray.py in ?(self, key)
862 try:
863 var = self._coords[key]
864 except KeyError:
865 dim_sizes = dict(zip(self.dims, self.shape))
--> 866 _, key, var = _get_virtual_variable(self._coords, key, dim_sizes)
867
868 return self._replace_maybe_drop_dims(var, name=key)
...
219 virtual_var = Variable(ref_var.dims, data)
220
221 return ref_name, var_name, virtual_var
AttributeError: 'IndexVariable' object has no attribute 'month'
Anything else we need to know?
No response
Environment
Latest version of main and stable version (0.7.0)