iris-grib
iris-grib copied to clipboard
GriB file with constant field (all values = 0) throws an error
We have a file where all data is 0. This is encoded as a bitmap in section 6, while section 7 is empty. When reading this into iris we get the error as below. Eccodes is able to read this without problems so it appears that there is no problem with the file. It seems like this error is similar to what was reported in #131. The file is available at the end, although with the ending ".txt" appended to allow it to be uploaded.
import iris
import iris_grib
iris.__version__
'3.8.0.dev21'
iris_grib.__version__
'0.18.0'
cube=iris.load_cube("sd_an_1961092406.grb")
cube.data
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
Cell In[10], line 1
----> 1 cube.data
File ~/CODE/scitools/iris/lib/iris/cube.py:2466, in Cube.data(self)
2433 @property
2434 def data(self):
2435 """
2436 The :class:`numpy.ndarray` representing the multi-dimensional data of
2437 the cube.
(...)
2464
2465 """
-> 2466 return self._data_manager.data
File ~/CODE/scitools/iris/lib/iris/_data_manager.py:206, in DataManager.data(self)
203 if self.has_lazy_data():
204 try:
205 # Realise the lazy data.
--> 206 result = as_concrete_data(self._lazy_array)
207 # Assign the realised result.
208 self._real_array = result
File ~/CODE/scitools/iris/lib/iris/_lazy_data.py:288, in as_concrete_data(data)
271 """
272 Return the actual content of a lazy array, as a numpy array.
273 If the input data is a NumPy `ndarray` or masked array, return it
(...)
285
286 """
287 if is_lazy_data(data):
--> 288 (data,) = _co_realise_lazy_arrays([data])
290 return data
File ~/CODE/scitools/iris/lib/iris/_lazy_data.py:251, in _co_realise_lazy_arrays(arrays)
236 def _co_realise_lazy_arrays(arrays):
237 """
238 Compute multiple lazy arrays and return a list of real values.
239
(...)
249
250 """
--> 251 computed_arrays = da.compute(*arrays)
252 results = []
253 for lazy_in, real_out in zip(arrays, computed_arrays):
254 # Ensure we always have arrays.
255 # Note : in some cases dask (and numpy) will return a scalar
256 # numpy.int/numpy.float object rather than an ndarray.
257 # Recorded in https://github.com/dask/dask/issues/2111.
File ~/mambaforge/envs/scitools/lib/python3.11/site-packages/dask/base.py:599, in compute(traverse, optimize_graph, scheduler, get, *args, **kwargs)
596 keys.append(x.__dask_keys__())
597 postcomputes.append(x.__dask_postcompute__())
--> 599 results = schedule(dsk, keys, **kwargs)
600 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
File ~/mambaforge/envs/scitools/lib/python3.11/site-packages/dask/threaded.py:89, in get(dsk, keys, cache, num_workers, pool, **kwargs)
86 elif isinstance(pool, multiprocessing.pool.Pool):
87 pool = MultiprocessingPoolExecutor(pool)
---> 89 results = get_async(
90 pool.submit,
91 pool._max_workers,
92 dsk,
93 keys,
94 cache=cache,
95 get_id=_thread_get_id,
96 pack_exception=pack_exception,
97 **kwargs,
98 )
100 # Cleanup pools associated to dead threads
101 with pools_lock:
File ~/mambaforge/envs/scitools/lib/python3.11/site-packages/dask/local.py:511, in get_async(submit, num_workers, dsk, result, cache, get_id, rerun_exceptions_locally, pack_exception, raise_exception, callbacks, dumps, loads, chunksize, **kwargs)
509 _execute_task(task, data) # Re-execute locally
510 else:
--> 511 raise_exception(exc, tb)
512 res, worker_id = loads(res_info)
513 state["cache"][key] = res
File ~/mambaforge/envs/scitools/lib/python3.11/site-packages/dask/local.py:319, in reraise(exc, tb)
317 if exc.__traceback__ is not tb:
318 raise exc.with_traceback(tb)
--> 319 raise exc
File ~/mambaforge/envs/scitools/lib/python3.11/site-packages/dask/local.py:224, in execute_task(key, task_info, dumps, loads, get_id, pack_exception)
222 try:
223 task, data = loads(task_info)
--> 224 result = _execute_task(task, data)
225 id = get_id()
226 result = dumps((result, id))
File ~/mambaforge/envs/scitools/lib/python3.11/site-packages/dask/core.py:119, in _execute_task(arg, cache, dsk)
115 func, args = arg[0], arg[1:]
116 # Note: Don't assign the subtask results to a variable. numpy detects
117 # temporaries by their reference count and can execute certain
118 # operations in-place.
--> 119 return func(*(_execute_task(a, cache) for a in args))
120 elif not ishashable(arg):
121 return arg
File ~/mambaforge/envs/scitools/lib/python3.11/site-packages/dask/optimization.py:990, in SubgraphCallable.__call__(self, *args)
988 if not len(args) == len(self.inkeys):
989 raise ValueError("Expected %d args, got %d" % (len(self.inkeys), len(args)))
--> 990 return core.get(self.dsk, self.outkey, dict(zip(self.inkeys, args)))
File ~/mambaforge/envs/scitools/lib/python3.11/site-packages/dask/core.py:149, in get(dsk, out, cache)
147 for key in toposort(dsk):
148 task = dsk[key]
--> 149 result = _execute_task(task, cache)
150 cache[key] = result
151 result = _execute_task(out, cache)
File ~/mambaforge/envs/scitools/lib/python3.11/site-packages/dask/core.py:119, in _execute_task(arg, cache, dsk)
115 func, args = arg[0], arg[1:]
116 # Note: Don't assign the subtask results to a variable. numpy detects
117 # temporaries by their reference count and can execute certain
118 # operations in-place.
--> 119 return func(*(_execute_task(a, cache) for a in args))
120 elif not ishashable(arg):
121 return arg
File ~/mambaforge/envs/scitools/lib/python3.11/site-packages/dask/utils.py:73, in apply(func, args, kwargs)
42 """Apply a function given its positional and keyword arguments.
43
44 Equivalent to ``func(*args, **kwargs)``
(...)
70 >>> dsk = {'task-name': task} # adds the task to a low level Dask task graph
71 """
72 if kwargs:
---> 73 return func(*args, **kwargs)
74 else:
75 return func(*args)
File ~/mambaforge/envs/scitools/lib/python3.11/site-packages/dask/array/core.py:120, in getter(a, b, asarray, lock)
118 lock.acquire()
119 try:
--> 120 c = a[b]
121 # Below we special-case `np.matrix` to force a conversion to
122 # `np.ndarray` and preserve original Dask behavior for `getter`,
123 # as for all purposes `np.matrix` is array-like and thus
124 # `is_arraylike` evaluates to `True` in that case.
125 if asarray and (not is_arraylike(c) or isinstance(c, np.matrix)):
File ~/mambaforge/envs/scitools/lib/python3.11/site-packages/iris_grib/message.py:239, in _DataProxy.__getitem__(self, keys)
237 bitmap_section = sections[6]
238 bitmap = self._bitmap(bitmap_section)
--> 239 data = sections[7]['codedValues']
241 if bitmap is not None:
242 # Note that bitmap and data are both 1D arrays at this point.
243 if np.count_nonzero(bitmap) == data.shape[0]:
244 # Only the non-masked values are included in codedValues.
File ~/mambaforge/envs/scitools/lib/python3.11/site-packages/iris_grib/message.py:405, in Section.__getitem__(self, key)
403 value = self._number
404 elif key not in self._keys:
--> 405 raise KeyError('{!r} not defined in section {}'.format(
406 key, self._number))
407 else:
408 value = self._get_key_value(key)
KeyError: "'codedValues' not defined in section 7"
While I am not a GriB expert, I nevertheless did some further investigations by comparing with the file for the preceding day (attached below). This sheds some light on where the problem might be. grib_dump -O shows for the non-problematic file:
1-4 section5Length = 21 5 numberOfSection = 5 6-9 numberOfValues = 466641 10-11 dataRepresentationTemplateNumber = 0 [Grid point data - simple packing (grib2/tables/17/5.0.table) ] 12-15 referenceValue = 0 16-17 binaryScaleFactor = -31 18-19 decimalScaleFactor = 0 20 bitsPerValue = 24 21 typeOfOriginalFieldValues = 0 [Floating point (grib2/tables/17/5.1.table) ]
and for the problematic file:
1-4 section5Length = 21 5 numberOfSection = 5 6-9 numberOfValues = 466641 10-11 dataRepresentationTemplateNumber = 0 [Grid point data - simple packing (grib2/tables/17/5.0.table) ] 12-15 referenceValue = 0 16-17 binaryScaleFactor = -28 18-19 decimalScaleFactor = 0 20 bitsPerValue = 0 21 typeOfOriginalFieldValues = 0 [Floating point (grib2/tables/17/5.1.table) ]
As far as I understand, if zero bits are used to represent the data then section 7 will be empty, in which case the referenceValue represents the whole field (taking a non-zero decimalScaleFactor into account). This was commented by @greenlaw here.
We ended up monkey-patching this (and a couple other special cases) by updating DataProxy.__getitem__ (originally defined in iris_grib/message.py) as follows. Use at your own risk.
def __getitem__(self, keys):
"""
GRIB message data accessor.
This handles reconstruction of the data values from packged representations, including
some special cases:
1) reconstruction of a message's data when `codedValues` is missing entirely, and
2) treating 9999 as missing data when `missingValueManagementUsed==1`.
See:
https://apps.ecmwf.int/codes/grib/format/grib2/regulations/
https://apps.ecmwf.int/codes/grib/format/grib2/templates/5/
"""
# NB. Currently assumes that the validity of this interpretation
# is checked before this proxy is created.
message = self.recreate_raw()
sections = message.sections
bitmap_section = sections[6]
bitmap = self._bitmap(bitmap_section)
if "codedValues" not in sections[7].keys():
data_rep_template = sections[5]["dataRepresentationTemplateNumber"]
if data_rep_template not in (0, 40, 41, 42, 50):
raise TranslationError(
f"Reconstruction of missing codedValues for dataRepresentationTemplateNumber {data_rep_template} is unsupported"
)
reference_value = sections[5]["referenceValue"]
decimal_scale_factor = sections[5]["decimalScaleFactor"]
data = np.ones(self.shape) * reference_value / (10**decimal_scale_factor)
else:
data = sections[7]["codedValues"]
if bitmap is not None:
# Note that bitmap and data are both 1D arrays at this point.
if np.count_nonzero(bitmap) == data.shape[0]:
# Only the non-masked values are included in codedValues.
_data = np.empty(shape=bitmap.shape)
_data[bitmap.astype(bool)] = data
# `ma.masked_array` masks where input = 1, the opposite of
# the behaviour specified by the GRIB spec.
data = ma.masked_array(_data, mask=np.logical_not(bitmap), fill_value=np.nan)
else:
msg = "Shapes of data and bitmap do not match."
raise TranslationError(msg)
elif "missingValueManagementUsed" in sections[5].keys() and sections[5]["missingValueManagementUsed"] == 1:
# This appears to be required for reading certain complex-packing fields.
# Whether it is caused by a data encoding problem or a bug in eccodes is unclear.
# The relevant eccodes decoder logic can be found here (note that although the module
# is misnamed as '2order_packing', it is definitely the complex packing logic):
# https://github.com/ecmwf/eccodes/blob/ac303936267ae99a9b3ae103e7d2db74674098e9/src/grib_accessor_class_data_g22order_packing.cc#L601
data = ma.masked_array(data, mask=(data == 9999), fill_value=np.nan)
data = data.reshape(self.shape)
return data.__getitem__(keys)
@larsbarring and @greenlaw This issue should now be resolved thanks to #362.
This has been included in the 0.19.0 release, which is now available on conda-forge and PyPI.
Care to try it out and confirm whether this fixes your problem?
If so, feel free to close this issue 👍
@bjlittle thanks for taking care of this! Here is my test:
$ ipython
Python 3.11.0 | packaged by conda-forge | (main, Jan 14 2023, 12:27:40) [GCC 11.3.0]
Type 'copyright', 'credits' or 'license' for more information
IPython 8.10.0 -- An enhanced Interactive Python. Type '?' for help.
In [1]: import iris
In [2]: print(iris.__version__)
3.8.0.dev52
In [3]: import iris_grib
/home/a001257/mambaforge/envs/scitools/lib/python3.11/site-packages/gribapi/__init__.py:23: UserWarning: ecCodes 2.31.0 or higher is recommended. You are running version 2.29.0
warnings.warn(
In [4]: print(iris_grib.__version__)
0.19.dev0
In [5]: d1 = iris.load_cube("sd_an_1961092306.grb").data
In [6]: print(d1.min(), d1.max())
0.0 0.0040847319178283215
In [7]: d2 = iris.load_cube("sd_an_1961092406.grb").data
In [8]: print(d2.min(), d2.max())
0.0 0.0
That is, the all-zero file is read without problems, as expected. :+1: :+1: :+1:
However, when looking at the PR (#362) I see that if the bitsPerValue field is 0 then data is filled with with zeros. This is of course right for my particular file (and probably most other). However, as @greenlaw notes the GRIB documentation allows the same type of "packing" if all values are the same, be it zeroes or something else. To be on the safe side you might want to change line 254 in message.py to implement the formula from the GRIB doumentation, i.e something similar to what @greenlaw did (above):
reference_value = sections[5]["referenceValue"]
decimal_scale_factor = sections[5]["decimalScaleFactor"]
data = np.ones(self.shape) * reference_value / (10**decimal_scale_factor)
@greenlaw Do you have an example GRIB2 file with a single message that demonstrates this behaviour?
I'm happy to add this extension as a patch i.e., 0.19.1
@bjlittle @larsbarring Sorry, it was a while ago that I wrote that code, and I can't seem to find a file that demonstrates the non-zero missing codedValues behavior. It's possible that eccodes handles this behind the scenes and the formula I included above is unnecessary. If I'm able to find one I will let you know.
No problem @greenlaw, thanks for getting back to let me know :beers:
I guess that files with all values in a field be exactly the same non-zero value are not that easy to come by. Hence I have hacked the test file included in my first post:
import struct
import os
with open("sd_an_1961092406.grb", mode='rb') as file:
fileContent = file.read()
# position in file of referenceValue -- derived using grib_dump -O
refPosition = 16 + 21 + 81 + 34 + 11
# change referenceValue to a "reasonable number",
# I did not bother what it actually meant or how it was packed into grib
ref = struct.pack("f",1.9999)
new = fileContent[0:refPosition] + ref + fileContent[refPosition+4:]
with open("sd_an_1961092406__NEW.grb", mode='wb') as file:
file.write(new)
print("\nCheck that the new value landed where is supposed to (actual value is garbled):")
os.system("grib_dump -O sd_an_1961092406__NEW.grb | grep referenceValue")
print("\n\n\nAnd print how eccodes sees the data:")
os.system("grib_dump sd_an_1961092406__NEW.grb | tail -n 36")
import iris
import iris_grib
print(iris.__version__)
print(iris_grib.__version__)
cube = iris.load_cube("sd_an_1961092406__NEW.grb")
print(f"\n\nCUBE min = {cube.data.min()}, and max = {cube.data.max()}")
results in this printout:
Check that the new value landed where is supposed to (actual value is garbled):
12-15 referenceValue = -0.000482554
And print how eccodes sees the data:
# A bit map applies to this product and is specified in this Section (grib2/tables/17/6.0.table)
bitMapIndicator = 0;
bitmapPresent = 1;
values(466641) = {
-0.000482554, -0.000482554, -0.000482554, -0.000482554, -0.000482554,
-0.000482554, -0.000482554, -0.000482554, -0.000482554, -0.000482554,
-0.000482554, -0.000482554, -0.000482554, -0.000482554, -0.000482554,
-0.000482554, -0.000482554, -0.000482554, -0.000482554, -0.000482554,
-0.000482554, -0.000482554, -0.000482554, -0.000482554, -0.000482554,
-0.000482554, -0.000482554, -0.000482554, -0.000482554, -0.000482554,
-0.000482554, -0.000482554, -0.000482554, -0.000482554, -0.000482554,
-0.000482554, -0.000482554, -0.000482554, -0.000482554, -0.000482554,
-0.000482554, -0.000482554, -0.000482554, -0.000482554, -0.000482554,
-0.000482554, -0.000482554, -0.000482554, -0.000482554, -0.000482554,
-0.000482554, -0.000482554, -0.000482554, -0.000482554, -0.000482554,
-0.000482554, -0.000482554, -0.000482554, -0.000482554, -0.000482554,
-0.000482554, -0.000482554, -0.000482554, -0.000482554, -0.000482554,
-0.000482554, -0.000482554, -0.000482554, -0.000482554, -0.000482554,
-0.000482554, -0.000482554, -0.000482554, -0.000482554, -0.000482554,
-0.000482554, -0.000482554, -0.000482554, -0.000482554, -0.000482554,
-0.000482554, -0.000482554, -0.000482554, -0.000482554, -0.000482554,
-0.000482554, -0.000482554, -0.000482554, -0.000482554, -0.000482554,
-0.000482554, -0.000482554, -0.000482554, -0.000482554, -0.000482554,
-0.000482554, -0.000482554, -0.000482554, -0.000482554, -0.000482554
... 466541 more values
}
#-READ ONLY- maximum = -0.000482554;
#-READ ONLY- minimum = -0.000482554;
#-READ ONLY- average = -0.000482554;
#-READ ONLY- standardDeviation = 0;
#-READ ONLY- skewness = 0;
#-READ ONLY- kurtosis = 0;
#-READ ONLY- isConstant = 1;
#-READ ONLY- numberOfMissing = 0;
#-READ ONLY- getNumberOfValues = 466641;
}
/home/a001257/mambaforge/envs/scitools/lib/python3.11/site-packages/gribapi/__init__.py:23: UserWarning: ecCodes 2.31.0 or higher is recommended. You are running version 2.29.0
warnings.warn(
3.8.0.dev52
0.19.dev0
CUBE min = 0.0, and max = 0.0
Thanks very much @larsbarring! Resource permitting, we now have all we need to work on this
@trexfeathers -- thanks
and if you change my code as follows the numbers becomes as one expects:
# put a "reasonable number"
ref = struct.pack(">f", 1.999)
bin = struct.pack("h", 0)
new = fileContent[0:refPosition] + ref + bin +fileContent[refPosition+6:]
where the grib-dump output now shows 1.999
.... and now I happened to bounce into #265 ....