Writing empty observations, parameters or responses to storage causes subsequent reading to fail
The following test fails:
from ert.config import EnkfObs
from ert.config.enkf_observation_implementation_type import (
EnkfObservationImplementationType,
)
from ert.config.observation_vector import ObsVector
from ert.storage import open_storage
def test_failure(tmpdir):
storage = open_storage(tmpdir, "w")
experiment = storage.create_experiment(
[],
observations=EnkfObs(
obs_vectors={
"A": ObsVector(
observation_type=EnkfObservationImplementationType.SUMMARY_OBS,
observation_key="A",
data_key="A",
observations={},
)
},
obs_time=[],
).datasets,
)
obs = storage.get_experiment(experiment.id).observations
with
src/ert/storage/local_experiment.py:134: in observations
return {
src/ert/storage/local_experiment.py:135: in <dictcomp>
observation.name: xr.open_dataset(observation, engine="scipy")
.../python3.11/site-packages/xarray/backends/api.py:573: in open_dataset
backend_ds = backend.open_dataset(
.../python3.11/site-packages/xarray/backends/scipy_.py:315: in open_dataset
ds = store_entrypoint.open_dataset(
.../python3.11/site-packages/xarray/backends/store.py:43: in open_dataset
vars, attrs = filename_or_obj.load()
.../python3.11/site-packages/xarray/backends/common.py:210: in load
(_decode_variable_name(k), v) for k, v in self.get_variables().items()
.../python3.11/site-packages/xarray/backends/scipy_.py:181: in get_variables
(k, self.open_store_variable(k, v)) for k, v in self.ds.variables.items()
.../python3.11/site-packages/xarray/backends/scipy_.py:170: in ds
return self._manager.acquire()
.../python3.11/site-packages/xarray/backends/file_manager.py:193: in acquire
file, _ = self._acquire_with_cache_info(needs_lock)
.../python3.11/site-packages/xarray/backends/file_manager.py:217: in _acquire_with_cache_info
file = self._opener(*self._args, **kwargs)
.../python3.11/site-packages/xarray/backends/scipy_.py:109: in _open_scipy_netcdf
return scipy.io.netcdf_file(filename, mode=mode, mmap=mmap, version=version)
.../python3.11/site-packages/scipy/io/_netcdf.py:278: in __init__
self._read()
.../python3.11/site-packages/scipy/io/_netcdf.py:607: in _read
self._read_var_array()
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <scipy.io._netcdf.netcdf_file object at 0x7fddacb1eed0>
def _read_var_array(self):
header = self.fp.read(4)
if header not in [ZERO, NC_VARIABLE]:
raise ValueError("Unexpected header.")
begin = 0
dtypes = {'names': [], 'formats': []}
rec_vars = []
count = self._unpack_int()
for var in range(count):
(name, dimensions, shape, attributes,
typecode, size, dtype_, begin_, vsize) = self._read_var()
# https://www.unidata.ucar.edu/software/netcdf/guide_toc.html
# Note that vsize is the product of the dimension lengths
# (omitting the record dimension) and the number of bytes
# per value (determined from the type), increased to the
# next multiple of 4, for each variable. If a record
# variable, this is the amount of space per record. The
# netCDF "record size" is calculated as the sum of the
# vsize's of all the record variables.
#
# The vsize field is actually redundant, because its value
# may be computed from other information in the header. The
# 32-bit vsize field is not large enough to contain the size
# of variables that require more than 2^32 - 4 bytes, so
# 2^32 - 1 is used in the vsize field for such variables.
if shape and shape[0] is None: # record variable
rec_vars.append(name)
# The netCDF "record size" is calculated as the sum of
# the vsize's of all the record variables.
self.__dict__['_recsize'] += vsize
if begin == 0:
begin = begin_
dtypes['names'].append(name)
dtypes['formats'].append(str(shape[1:]) + dtype_)
# Handle padding with a virtual variable.
if typecode in 'bch':
actual_size = reduce(mul, (1,) + shape[1:]) * size
padding = -actual_size % 4
if padding:
dtypes['names'].append('_padding_%d' % var)
dtypes['formats'].append('(%d,)>b' % padding)
# Data will be set later.
data = None
else: # not a record variable
# Calculate size to avoid problems with vsize (above)
> a_size = reduce(mul, shape, 1) * size
E TypeError: unsupported operand type(s) for *: 'int' and 'NoneType'
Is a bug in xarray, can be reproduced by:
import numpy as np
import xarray as xr
from datetime import datetime
ds = (
xr.Dataset(
{
"values": (
["name", "time"],
np.array([[]], dtype=np.float32).T,
)
},
coords={"time": [datetime(1996, 1, 3, 0, 0)], "name": []},
)
.drop_duplicates("time")
.expand_dims({"index": [0]})
)
ds.to_netcdf("file.nc", engine="scipy")
_ = xr.open_dataset("file.nc", engine="scipy")
It is not known whether other cases with non-empty arrays are affected.
Upstream bug report here: https://github.com/pydata/xarray/issues/8693
The bug could be triggered by setting SUMMARY FOPR in the config file and having a summary file generated by the forward model that did not contain FOPR.
It could also be triggered by setting ECLBASE in the config file, but forgetting to add SUMMARY which would create a SummaryConfig with an empty keylist.
This would cause subsequent reads of that ensemble to fail. To stop this exact case from happening the following check was added:
https://github.com/eivindjahren/ert/blob/8397bc2e22b1fb3c2fecd28bd94051de5a42a280/src/ert/config/summary_config.py#L33-L38
This turns such situations into realization failures.
Can be reproduced by running ert ensemble_experiment config.ert on the following input
config.ert:
NUM_REALIZATIONS 10
GEN_KW BPR template.txt params.txt BPR.txt
params.txt:
BPR UNIFORM 0.1 0.5
template.txt:
BPR:<BPR>
and an empty BPR.txt.
Can be reproduced by running
ert ensemble_experiment config.erton the following inputconfig.ert:
NUM_REALIZATIONS 10 GEN_KW BPR template.txt params.txt BPR.txtparams.txt:
BPR UNIFORM 0.1 0.5template.txt:
BPR:<BPR>and an empty
BPR.txt.
This example is a bit misleading, as params.txt has the normal content of BPR.txt, which we should definitely validate the content of.
I will close this as intended behavior, but added an issue for validation: #7937