xarray
xarray copied to clipboard
netCDF encoding and decoding issues.
What happened?
Reading or writing netCDF variables containing scale_factor and/or fill_value might raise the following error:
UFuncTypeError: Cannot cast ufunc 'multiply' output from dtype('float64') to dtype('int64') with casting rule 'same_kind'
This problem might be related to the following changes: #7654.
What did you expect to happen?
I'm expecting it to work like it did before xarray 2024.03.0!
Minimal Complete Verifiable Example
# Example 1, decoding problem.
import netCDF4 as nc
import numpy as np
import xarray as xr
with nc.Dataset("test1.nc", mode="w") as ncds:
ncds.createDimension(dimname="d")
ncx = ncds.createVariable(
varname="x",
datatype=np.int64,
dimensions=("d",),
fill_value=-1,
)
ncx.scale_factor = 1e-3
ncx.units = "seconds"
ncx[:] = np.array([0.001, 0.002, 0.003])
# This will raise the error
xr.load_dataset("test1.nc")
# Example 2, encoding problem.
import netCDF4 as nc
import numpy as np
import xarray as xr
with nc.Dataset("test2.nc", mode="w") as ncds:
ncds.createDimension(dimname="d")
ncx = ncds.createVariable(varname="x", datatype=np.int8, dimensions=("d",))
ncx.scale_factor = 1000
ncx[:] = np.array([1000, 2000, 3000])
# Reading it does work
data = xr.load_dataset("test2.nc")
# Writing read data does not work
data.to_netcdf("text2x.nc")
MVCE confirmation
- [X] Minimal example — the example is as focused as reasonably possible to demonstrate the underlying issue in xarray.
- [X] Complete example — the example is self-contained, including all data and the text of any traceback.
- [X] Verifiable example — the example copy & pastes into an IPython prompt or Binder notebook, returning the result.
- [X] New issue — a search of GitHub Issues suggests this is not a duplicate.
- [X] Recent environment — the issue occurs with the latest version of xarray and its dependencies.
Relevant log output
# Example 1 error
---------------------------------------------------------------------------
UFuncTypeError Traceback (most recent call last)
Cell In[38], line 1
----> 1 xr.load_dataset("test2.nc")
File .../conda/envs/ong312_local/lib/python3.12/site-packages/xarray/backends/api.py:280, in load_dataset(filename_or_obj, **kwargs)
277 raise TypeError("cache has no effect in this context")
279 with open_dataset(filename_or_obj, **kwargs) as ds:
--> 280 return ds.load()
File .../conda/envs/ong312_local/lib/python3.12/site-packages/xarray/core/dataset.py:855, in Dataset.load(self, **kwargs)
853 for k, v in self.variables.items():
854 if k not in lazy_data:
--> 855 v.load()
857 return self
File .../conda/envs/ong312_local/lib/python3.12/site-packages/xarray/core/variable.py:961, in Variable.load(self, **kwargs)
944 def load(self, **kwargs):
945 """Manually trigger loading of this variable's data from disk or a
946 remote source into memory and return this variable.
947
(...)
959 dask.array.compute
960 """
--> 961 self._data = to_duck_array(self._data, **kwargs)
962 return self
File .../conda/envs/ong312_local/lib/python3.12/site-packages/xarray/namedarray/pycompat.py:134, in to_duck_array(data, **kwargs)
131 return loaded_data
133 if isinstance(data, ExplicitlyIndexed):
--> 134 return data.get_duck_array() # type: ignore[no-untyped-call, no-any-return]
135 elif is_duck_array(data):
136 return data
File .../conda/envs/ong312_local/lib/python3.12/site-packages/xarray/core/indexing.py:809, in MemoryCachedArray.get_duck_array(self)
808 def get_duck_array(self):
--> 809 self._ensure_cached()
810 return self.array.get_duck_array()
File .../conda/envs/ong312_local/lib/python3.12/site-packages/xarray/core/indexing.py:803, in MemoryCachedArray._ensure_cached(self)
802 def _ensure_cached(self):
--> 803 self.array = as_indexable(self.array.get_duck_array())
File .../conda/envs/ong312_local/lib/python3.12/site-packages/xarray/core/indexing.py:760, in CopyOnWriteArray.get_duck_array(self)
759 def get_duck_array(self):
--> 760 return self.array.get_duck_array()
File .../conda/envs/ong312_local/lib/python3.12/site-packages/xarray/core/indexing.py:630, in LazilyIndexedArray.get_duck_array(self)
625 # self.array[self.key] is now a numpy array when
626 # self.array is a BackendArray subclass
627 # and self.key is BasicIndexer((slice(None, None, None),))
628 # so we need the explicit check for ExplicitlyIndexed
629 if isinstance(array, ExplicitlyIndexed):
--> 630 array = array.get_duck_array()
631 return _wrap_numpy_scalars(array)
File .../conda/envs/ong312_local/lib/python3.12/site-packages/xarray/coding/variables.py:81, in _ElementwiseFunctionArray.get_duck_array(self)
80 def get_duck_array(self):
---> 81 return self.func(self.array.get_duck_array())
File .../conda/envs/ong312_local/lib/python3.12/site-packages/xarray/coding/variables.py:81, in _ElementwiseFunctionArray.get_duck_array(self)
80 def get_duck_array(self):
---> 81 return self.func(self.array.get_duck_array())
File .../conda/envs/ong312_local/lib/python3.12/site-packages/xarray/coding/variables.py:399, in _scale_offset_decoding(data, scale_factor, add_offset, dtype)
397 data = data.astype(dtype=dtype, copy=True)
398 if scale_factor is not None:
--> 399 data *= scale_factor
400 if add_offset is not None:
401 data += add_offset
UFuncTypeError: Cannot cast ufunc 'multiply' output from dtype('float64') to dtype('int64') with casting rule 'same_kind'
# Example 2 error
---------------------------------------------------------------------------
UFuncTypeError Traceback (most recent call last)
Cell In[42], line 1
----> 1 data.to_netcdf("text1x.nc")
File .../conda/envs/ong312_local/lib/python3.12/site-packages/xarray/core/dataset.py:2298, in Dataset.to_netcdf(self, path, mode, format, group, engine, encoding, unlimited_dims, compute, invalid_netcdf)
2295 encoding = {}
2296 from xarray.backends.api import to_netcdf
-> 2298 return to_netcdf( # type: ignore # mypy cannot resolve the overloads:(
2299 self,
2300 path,
2301 mode=mode,
2302 format=format,
2303 group=group,
2304 engine=engine,
2305 encoding=encoding,
2306 unlimited_dims=unlimited_dims,
2307 compute=compute,
2308 multifile=False,
2309 invalid_netcdf=invalid_netcdf,
2310 )
File .../conda/envs/ong312_local/lib/python3.12/site-packages/xarray/backends/api.py:1339, in to_netcdf(dataset, path_or_file, mode, format, group, engine, encoding, unlimited_dims, compute, multifile, invalid_netcdf)
1334 # TODO: figure out how to refactor this logic (here and in save_mfdataset)
1335 # to avoid this mess of conditionals
1336 try:
1337 # TODO: allow this work (setting up the file for writing array data)
1338 # to be parallelized with dask
-> 1339 dump_to_store(
1340 dataset, store, writer, encoding=encoding, unlimited_dims=unlimited_dims
1341 )
1342 if autoclose:
1343 store.close()
File .../conda/envs/ong312_local/lib/python3.12/site-packages/xarray/backends/api.py:1386, in dump_to_store(dataset, store, writer, encoder, encoding, unlimited_dims)
1383 if encoder:
1384 variables, attrs = encoder(variables, attrs)
-> 1386 store.store(variables, attrs, check_encoding, writer, unlimited_dims=unlimited_dims)
File .../conda/envs/ong312_local/lib/python3.12/site-packages/xarray/backends/common.py:393, in AbstractWritableDataStore.store(self, variables, attributes, check_encoding_set, writer, unlimited_dims)
390 if writer is None:
391 writer = ArrayWriter()
--> 393 variables, attributes = self.encode(variables, attributes)
395 self.set_attributes(attributes)
396 self.set_dimensions(variables, unlimited_dims=unlimited_dims)
File .../conda/envs/ong312_local/lib/python3.12/site-packages/xarray/backends/common.py:482, in WritableCFDataStore.encode(self, variables, attributes)
479 def encode(self, variables, attributes):
480 # All NetCDF files get CF encoded by default, without this attempting
481 # to write times, for example, would fail.
--> 482 variables, attributes = cf_encoder(variables, attributes)
483 variables = {k: self.encode_variable(v) for k, v in variables.items()}
484 attributes = {k: self.encode_attribute(v) for k, v in attributes.items()}
File .../conda/envs/ong312_local/lib/python3.12/site-packages/xarray/conventions.py:795, in cf_encoder(variables, attributes)
792 # add encoding for time bounds variables if present.
793 _update_bounds_encoding(variables)
--> 795 new_vars = {k: encode_cf_variable(v, name=k) for k, v in variables.items()}
797 # Remove attrs from bounds variables (issue #2921)
798 for var in new_vars.values():
File .../conda/envs/ong312_local/lib/python3.12/site-packages/xarray/conventions.py:196, in encode_cf_variable(var, needs_copy, name)
183 ensure_not_multiindex(var, name=name)
185 for coder in [
186 times.CFDatetimeCoder(),
187 times.CFTimedeltaCoder(),
(...)
194 variables.BooleanCoder(),
195 ]:
--> 196 var = coder.encode(var, name=name)
198 # TODO(kmuehlbauer): check if ensure_dtype_not_object can be moved to backends:
199 var = ensure_dtype_not_object(var, name=name)
File .../conda/envs/ong312_local/lib/python3.12/site-packages/xarray/coding/variables.py:476, in CFScaleOffsetCoder.encode(self, variable, name)
474 data -= pop_to(encoding, attrs, "add_offset", name=name)
475 if "scale_factor" in encoding:
--> 476 data /= pop_to(encoding, attrs, "scale_factor", name=name)
478 return Variable(dims, data, attrs, encoding, fastpath=True)
UFuncTypeError: Cannot cast ufunc 'divide' output from dtype('float64') to dtype('int64') with casting rule 'same_kind'
Anything else we need to know?
No response
Environment
INSTALLED VERSIONS
commit: None python: 3.12.3 | packaged by conda-forge | (main, Apr 15 2024, 18:38:13) [GCC 12.3.0] python-bits: 64 OS: Linux OS-release: 5.15.0-92-generic machine: x86_64 processor: x86_64 byteorder: little LC_ALL: None LANG: fr_FR.UTF-8 LOCALE: ('fr_FR', 'UTF-8') libhdf5: 1.14.3 libnetcdf: 4.9.2
xarray: 2024.3.0 pandas: 2.2.2 numpy: 1.26.4 scipy: 1.13.0 netCDF4: 1.6.5 pydap: None h5netcdf: 1.3.0 h5py: 3.11.0 Nio: None zarr: 2.17.2 cftime: 1.6.3 nc_time_axis: None iris: None bottleneck: None dask: 2024.4.1 distributed: 2024.4.1 matplotlib: 3.8.4 cartopy: 0.23.0 seaborn: None numbagg: None fsspec: 2024.3.1 cupy: None pint: 0.23 sparse: None flox: None numpy_groupies: None setuptools: 69.5.1 pip: 24.0 conda: 24.3.0 pytest: 8.1.1 mypy: 1.9.0 IPython: 8.22.2 sphinx: 7.3.5