iris.coord_categorisation.add_season_membership creates a boolean coordinate that cannot be saved to netcdf
🐛 Bug Report
Trying to save a cube that has had iris.coord_categorisation.add_season_membership applied to it fails because netcdf does not allow boolean types
How To Reproduce
Running
from iris import coord_categorisation
from iris.tests import stock
cube = stock.realistic_4d()
iris.save(cube, "cube_with_season_membership.nc")
gives the following error from the netcdf4 library (full traceback below)
TypeError: Illegal primitive data type, must be one of dict_keys(['S1', 'i1', 'u1', 'i2', 'u2', 'i4', 'u4', 'i8', 'u8', 'f4', 'f8']), got bool (variable 'season_membership', group '/')
Expected behaviour
It's fairly simple to work around this by converting the coordinate data to integers, but I would expect it to "just work". I can think of two solutions but both have downsides, so I wasn't sure what was best.
- Change add_season_membership to return an integer 1s/0s array. This means the cube never becomes incompatible with netCDF, but might be an issue for people with code expecting a boolean array.
- Change the saving behaviour in iris to convert boolean arrays to integers. This means add_season_membership can work the same but I don't know how you stop the cube becoming inconsistent between saving and reloading.
Additional context
Click to expand this section...
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[14], line 1
----> 1 iris.save(cube, "cube_with_season_membership.nc")
File ~/miniforge3/envs/core/lib/python3.11/site-packages/iris/io/__init__.py:497, in save(source, target, saver, **kwargs)
495 # Single cube?
496 if isinstance(source, Cube):
--> 497 result = saver(source, target, **kwargs)
499 # CubeList or sequence of cubes?
500 elif isinstance(source, CubeList) or (
501 isinstance(source, (list, tuple))
502 and all([isinstance(i, Cube) for i in source])
503 ):
504 # Only allow cubelist saving for those fileformats that are capable.
File ~/miniforge3/envs/core/lib/python3.11/site-packages/iris/fileformats/netcdf/saver.py:2876, in save(cube, filename, netcdf_format, local_keys, unlimited_dimensions, zlib, complevel, shuffle, fletcher32, contiguous, chunksizes, endian, least_significant_digit, packing, fill_value, compute)
2873 with Saver(filename, netcdf_format, compute=compute) as sman:
2874 # Iterate through the cubelist.
2875 for cube, packspec, fill_value in zip(cubes, packspecs, fill_values):
-> 2876 sman.write(
2877 cube,
2878 local_keys,
2879 unlimited_dimensions,
2880 zlib,
2881 complevel,
2882 shuffle,
2883 fletcher32,
2884 contiguous,
2885 chunksizes,
2886 endian,
2887 least_significant_digit,
2888 packing=packspec,
2889 fill_value=fill_value,
2890 )
2892 if iris.config.netcdf.conventions_override:
2893 # Set to the default if custom conventions are not available.
2894 conventions = cube.attributes.get(
2895 "Conventions", CF_CONVENTIONS_VERSION
2896 )
File ~/miniforge3/envs/core/lib/python3.11/site-packages/iris/fileformats/netcdf/saver.py:698, in Saver.write(self, cube, local_keys, unlimited_dimensions, zlib, complevel, shuffle, fletcher32, contiguous, chunksizes, endian, least_significant_digit, packing, fill_value)
694 self._add_dim_coords(cube, cube_dimensions)
696 # Add the auxiliary coordinate variables and associate the data
697 # variable to them
--> 698 self._add_aux_coords(cube, cf_var_cube, cube_dimensions)
700 # Add the cell_measures variables and associate the data
701 # variable to them
702 self._add_cell_measures(cube, cf_var_cube, cube_dimensions)
File ~/miniforge3/envs/core/lib/python3.11/site-packages/iris/fileformats/netcdf/saver.py:1051, in Saver._add_aux_coords(self, cube, cf_var_cube, dimension_names)
1046 location_coords: MeshNodeCoords | MeshEdgeCoords | MeshFaceCoords = getattr(
1047 mesh, f"{mesh_location}_coords"
1048 )
1049 coords_to_add.extend(list(location_coords))
-> 1051 return self._add_inner_related_vars(
1052 cube,
1053 cf_var_cube,
1054 dimension_names,
1055 coords_to_add,
1056 )
File ~/miniforge3/envs/core/lib/python3.11/site-packages/iris/fileformats/netcdf/saver.py:999, in Saver._add_inner_related_vars(self, cube, cf_var_cube, dimension_names, coordlike_elements)
996 cf_name = self._name_coord_map.name(element)
997 if cf_name is None:
998 # Not already present : create it
--> 999 cf_name = self._create_generic_cf_array_var(
1000 cube, dimension_names, element
1001 )
1002 self._name_coord_map.append(cf_name, element)
1004 if role_attribute_name == "cell_measures":
1005 # In the case of cell-measures, the attribute entries are not just
1006 # a var_name, but each have the form "<measure>: <varname>".
File ~/miniforge3/envs/core/lib/python3.11/site-packages/iris/fileformats/netcdf/saver.py:1900, in Saver._create_generic_cf_array_var(self, cube_or_mesh, cube_dim_names, element, element_dims, fill_value)
1897 cf_name = element_dims[0]
1899 # Create the CF-netCDF variable.
-> 1900 cf_var = self._dataset.createVariable(
1901 cf_name,
1902 data.dtype.newbyteorder("="),
1903 element_dims,
1904 fill_value=fill_value,
1905 )
1907 # Add the axis attribute for spatio-temporal CF-netCDF coordinates.
1908 if is_dimcoord:
File ~/miniforge3/envs/core/lib/python3.11/site-packages/iris/fileformats/netcdf/_thread_safe_nc.py:229, in GroupWrapper.createVariable(self, *args, **kwargs)
221 """
222 Calls createVariable() from netCDF4.Group/Dataset within _GLOBAL_NETCDF4_LOCK, returning VariableWrapper.
223
(...)
226 also performed within _GLOBAL_NETCDF4_LOCK.
227 """
228 with _GLOBAL_NETCDF4_LOCK:
--> 229 new_variable = self._contained_instance.createVariable(
230 *args, **kwargs
231 )
232 return VariableWrapper.from_existing(new_variable)
File src/netCDF4/_netCDF4.pyx:2967, in netCDF4._netCDF4.Dataset.createVariable()
File src/netCDF4/_netCDF4.pyx:4149, in netCDF4._netCDF4.Variable.__init__()
TypeError: Illegal primitive data type, must be one of dict_keys(['S1', 'i1', 'u1', 'i2', 'u2', 'i4', 'u4', 'i8', 'u8', 'f4', 'f8']), got bool (variable 'season_membership', group '/')
@scitools/peloton struggling to agree on this ! We don't really like options 1 or 2, for the reasons you already stated.
Is it possible (3.) emit a warning that the result is unsaveable, and suggest a workaround if that is required
(e.g. coord.points = coord.points.astype(int))