xgcm
xgcm copied to clipboard
[FEATURE] Keep attrs when transforming variables
I would like to keep the attrs of variables transformed with grid.transform.
This is the (WIP) code I am using atm to create this behavior.
import logging
import numpy as np
from xgcm import Grid
logger = logging.getLogger(__name__)
# Set the default target resolution for transforming to altitude.
TARGET_RESOLUTION = np.arange(0, 80000 + 0.1, 100)
def xgcm_transformation(
ds: xr.Dataset,
orig_coord: str = "pressure",
target_coord: str = "altitude",
target_resolution: np.ndarray = TARGET_RESOLUTION,
keep_attrs: bool = True,
manual_method: str = "auto",
) -> xr.Dataset:
"""
Apply a coordinate transformation to the dataset.
Parameters
----------
ds : xr.Dataset
The input dataset.
orig_coord : str, optional
The original vertical coordinate to transform from. Default is "pressure".
target_coord : str, optional
The target vertical coordinate to transform to. Default is "altitude".
target_resolution : np.ndarray, optional
The target resolution to transform to. Default is np.arange(0, 80000, 100).
manual_method : str, optional
The chosen manual method for the transformation. Either lin or log.
Default is auto. If the original pressure coordinate is pressure, it applies log to it first.
If it is altitude no log is applied. (WIP! FIXME)
Returns
-------
xr.Dataset
The transformed dataset.
"""
if manual_method != "auto":
method = manual_method
elif orig_coord == "altitude":
method = "linear"
elif orig_coord == "pressure":
method = "log"
else:
msg = "Method could not be determined"
raise ValueError(msg)
if method == "log":
ds[orig_coord] = np.log(ds[orig_coord])
logger.info(
"Tranform Dataset from %s to %s using %s transform",
orig_coord,
target_coord,
method,
)
# Add orig_coord along target_coord as data variable in the dataset, as xgcm does not output it automatically
if ds[target_coord].chunks:
msg = f"Add new {orig_coord} data variable and chunk it."
logger.info(msg)
ds[f"{orig_coord}_var"] = (
ds[orig_coord]
.broadcast_like(ds[target_coord])
.astype("float32")
.chunk(chunks=ds[target_coord].chunks)
)
else:
msg = f"Add new {orig_coord} data variable."
logger.info(msg)
ds[f"{orig_coord}_var"] = (
ds[orig_coord].broadcast_like(ds[target_coord]).astype("float32")
)
ds[f"{orig_coord}_var"].attrs = ds[orig_coord].attrs
if keep_attrs:
data_var_attrs = {data_var: ds[data_var].attrs for data_var in ds.data_vars}
coord_attrs = {
coord: ds[coord].attrs for coord in ds.coords if coord != orig_coord
}
attrs = data_var_attrs | coord_attrs
grid = Grid(ds, coords={"Z": {"center": orig_coord}}, periodic=False)
transform_vars = [var_ for var_ in ds.data_vars if var_ != target_coord]
transform_list = []
for var_ in transform_vars:
var_transformed = grid.transform(
ds[var_],
"Z",
target_resolution,
target_data=ds[target_coord],
)
transform_list.append(var_transformed)
out = xr.merge(transform_list)
if keep_attrs:
for var_ in attrs:
out[var_].attrs = attrs[var_]
# Rename orig_coord to the original name.
out = out.rename({f"{orig_coord}_var": orig_coord})
if method == "log":
out[orig_coord] = np.exp(out[orig_coord])
return out
Hey @mgorfer thank you so much for getting involved with xgcm more. I think this is a great suggestion. How would you imagine this to happen? By default or with a keyword option like this:
grid.transform(...,keep_attrs=True)
I think we should make an effort to conform to the same behavior across different grid methods (diff, min, transform, etc). There have been some discussions on this for other methods before and I wonder if this would be an additional reason to internally refactor transform to use grid_ufuncs?
Yes! keep_attrs=True would be the best approach in my opinion.
I am not sure about the default setting for this.
In my use case (so far), I always want to keep the attrs.
But I only transform variables like temperature and humidity.
There might be some variables which will not keep the same units after the transformation.
The keep_attrs approach is the same as in the native xarray functions
https://docs.xarray.dev/en/stable/generated/xarray.set_options.html
In xESMF, it is already possible to keep the attrs for the regridding.
It defaults to False if I am not mistaken.
dr_out = regridder(ds, keep_attrs=True)
https://xesmf.readthedocs.io/en/stable/user_api.html#xesmf.frontend.Regridder