xarray
xarray copied to clipboard
ValueError: conflicting sizes for dimension in xr.open_dataset("reference://"...) VS. no error in xr.open_dataset(direct_file_path) for h5
What is your issue?
Hi all, on a project I am attempting a dataset read using the xarray JSON reference system. Metadata for this file (an ATL03 h5 file) can be found here: https://nsidc.org/sites/default/files/icesat2_atl03_data_dict_v005.pdf
- Reading a group with variables that have 2 dimensions or less produces no issues. E.g Group
"gt1l/heights"
(documented as/gtx/heights
in the PDF)
ds = xr.open_dataset("reference://", engine="zarr", backend_kwargs={
"consolidated": False,
"storage_options": {"fo": JSON_PATH},
"group": "gt1l/heights"
})
- Reading a group with a variable that has 3+ dimensions causes the following error. The group
"ancillary_data/calibrations/dead_time_radiometric_signal_loss/gt1l"
contains variablerad_corr
which contains 3 dimensions.
ds = xr.open_dataset("reference://", engine="zarr", backend_kwargs={
"consolidated": False,
"storage_options": {"fo": JSON_PATH},
"group": "ancillary_data/calibrations/dead_time_radiometric_signal_loss/gt1l"
})
{
"name": "ValueError",
"message": "conflicting sizes for dimension 'phony_dim_1': length 498 on 'width' and length 160 on {'phony_dim_0': 'dead_time', 'phony_dim_1': 'rad_corr', 'phony_dim_2': 'rad_corr'}",
"stack": "---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[2], line 1
----> 1 ds = xr.open_dataset(\"reference://\", engine=\"zarr\", backend_kwargs={
2 \"consolidated\": False,
3 \"storage_options\": {\"fo\": JSON_PATH},
4 \"group\": group_path
5 })
File ~/opt/anaconda3/envs/kerchunkc/lib/python3.8/site-packages/xarray/backends/api.py:539, in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, inline_array, backend_kwargs, **kwargs)
527 decoders = _resolve_decoders_kwargs(
528 decode_cf,
529 open_backend_dataset_parameters=backend.open_dataset_parameters,
(...)
535 decode_coords=decode_coords,
536 )
538 overwrite_encoded_chunks = kwargs.pop(\"overwrite_encoded_chunks\", None)
--> 539 backend_ds = backend.open_dataset(
540 filename_or_obj,
541 drop_variables=drop_variables,
542 **decoders,
543 **kwargs,
544 )
545 ds = _dataset_from_backend_dataset(
546 backend_ds,
547 filename_or_obj,
(...)
555 **kwargs,
556 )
557 return ds
File ~/opt/anaconda3/envs/kerchunkc/lib/python3.8/site-packages/xarray/backends/zarr.py:862, in ZarrBackendEntrypoint.open_dataset(self, filename_or_obj, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta, group, mode, synchronizer, consolidated, chunk_store, storage_options, stacklevel)
860 store_entrypoint = StoreBackendEntrypoint()
861 with close_on_error(store):
--> 862 ds = store_entrypoint.open_dataset(
863 store,
864 mask_and_scale=mask_and_scale,
865 decode_times=decode_times,
866 concat_characters=concat_characters,
867 decode_coords=decode_coords,
868 drop_variables=drop_variables,
869 use_cftime=use_cftime,
870 decode_timedelta=decode_timedelta,
871 )
872 return ds
File ~/opt/anaconda3/envs/kerchunkc/lib/python3.8/site-packages/xarray/backends/store.py:43, in StoreBackendEntrypoint.open_dataset(self, store, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta)
29 encoding = store.get_encoding()
31 vars, attrs, coord_names = conventions.decode_cf_variables(
32 vars,
33 attrs,
(...)
40 decode_timedelta=decode_timedelta,
41 )
---> 43 ds = Dataset(vars, attrs=attrs)
44 ds = ds.set_coords(coord_names.intersection(vars))
45 ds.set_close(store.close)
File ~/opt/anaconda3/envs/kerchunkc/lib/python3.8/site-packages/xarray/core/dataset.py:604, in Dataset.__init__(self, data_vars, coords, attrs)
601 if isinstance(coords, Dataset):
602 coords = coords.variables
--> 604 variables, coord_names, dims, indexes, _ = merge_data_and_coords(
605 data_vars, coords, compat=\"broadcast_equals\"
606 )
608 self._attrs = dict(attrs) if attrs is not None else None
609 self._close = None
File ~/opt/anaconda3/envs/kerchunkc/lib/python3.8/site-packages/xarray/core/merge.py:575, in merge_data_and_coords(data_vars, coords, compat, join)
573 objects = [data_vars, coords]
574 explicit_coords = coords.keys()
--> 575 return merge_core(
576 objects,
577 compat,
578 join,
579 explicit_coords=explicit_coords,
580 indexes=Indexes(indexes, coords),
581 )
File ~/opt/anaconda3/envs/kerchunkc/lib/python3.8/site-packages/xarray/core/merge.py:761, in merge_core(objects, compat, join, combine_attrs, priority_arg, explicit_coords, indexes, fill_value)
756 prioritized = _get_priority_vars_and_indexes(aligned, priority_arg, compat=compat)
757 variables, out_indexes = merge_collected(
758 collected, prioritized, compat=compat, combine_attrs=combine_attrs
759 )
--> 761 dims = calculate_dimensions(variables)
763 coord_names, noncoord_names = determine_coords(coerced)
764 if explicit_coords is not None:
File ~/opt/anaconda3/envs/kerchunkc/lib/python3.8/site-packages/xarray/core/variable.py:3208, in calculate_dimensions(variables)
3206 last_used[dim] = k
3207 elif dims[dim] != size:
-> 3208 raise ValueError(
3209 f\"conflicting sizes for dimension {dim!r}: \"
3210 f\"length {size} on {k!r} and length {dims[dim]} on {last_used!r}\"
3211 )
3212 return dims
ValueError: conflicting sizes for dimension 'phony_dim_1': length 498 on 'width' and length 160 on {'phony_dim_0': 'dead_time', 'phony_dim_1': 'rad_corr', 'phony_dim_2': 'rad_corr'}"
}
- Now, contrast with reading the same group with the 3+ dimension variables, but using the direct h5 file path. This does not produce an error
ds = xr.open_dataset("/Users/katrinasharonin/Downloads/ATL03_20230816235231_08822014_006_01.h5", group="ancillary_data/calibrations/dead_time_radiometric_signal_loss/gt1l")
The JSON reference file has been attached for reference ATL03_REF_NONUTM.json
Thanks for opening your first issue here at xarray! Be sure to follow the issue template! If you have an idea for a solution, we would really welcome a Pull Request with proposed changes. See the Contributing Guide for more. It may take us a while to respond here, but we really value your contribution. Contributors like you help make xarray better. Thank you!
I am also seeing this when trying to open a multiscale OME-Zarr dataset, since the scale arrays all have different dimensions.
File "C:\Users\cameron.arshadi\repos\aind-morphology-utils\src\aind_morphology_utils\movie_maker.py", line 592, in <module>
main()
File "C:\Users\cameron.arshadi\repos\aind-morphology-utils\src\aind_morphology_utils\movie_maker.py", line 572, in main
ds = xarray.open_zarr(config.zarr_path, chunks=None, consolidated=False)
File "C:\Users\cameron.arshadi\AppData\Local\miniconda3\envs\amu\lib\site-packages\xarray\backends\zarr.py", line 825, in open_zarr
ds = open_dataset(
File "C:\Users\cameron.arshadi\AppData\Local\miniconda3\envs\amu\lib\site-packages\xarray\backends\api.py", line 541, in open_dataset
backend_ds = backend.open_dataset(
File "C:\Users\cameron.arshadi\AppData\Local\miniconda3\envs\amu\lib\site-packages\xarray\backends\zarr.py", line 903, in open_dataset
ds = store_entrypoint.open_dataset(
File "C:\Users\cameron.arshadi\AppData\Local\miniconda3\envs\amu\lib\site-packages\xarray\backends\store.py", line 47, in open_dataset
ds = Dataset(vars, attrs=attrs)
File "C:\Users\cameron.arshadi\AppData\Local\miniconda3\envs\amu\lib\site-packages\xarray\core\dataset.py", line 613, in __init__
variables, coord_names, dims, indexes, _ = merge_data_and_coords(
File "C:\Users\cameron.arshadi\AppData\Local\miniconda3\envs\amu\lib\site-packages\xarray\core\merge.py", line 575, in merge_data_and_coords
return merge_core(
File "C:\Users\cameron.arshadi\AppData\Local\miniconda3\envs\amu\lib\site-packages\xarray\core\merge.py", line 761, in merge_core
dims = calculate_dimensions(variables)
File "C:\Users\cameron.arshadi\AppData\Local\miniconda3\envs\amu\lib\site-packages\xarray\core\variable.py", line 3216, in calculate_dimensions
raise ValueError(
ValueError: conflicting sizes for dimension 'z': length 14421 on '1' and length 28842 on {'t': '0', 'c': '0', 'z': '0', 'y': '0', 'x': '0'}
Not an expert here, but presumably if a file uses the same name for differently sized dimensions, xarray isn't going to be able to open it?
One workaround may be to open dataarrays separately, rename dimension names, and then combine into a dataset. But not confident.
Does anyone have other thoughts or should we close?