VirtualiZarr
VirtualiZarr copied to clipboard
Xarray assertions can trigger loading
This should not happen, instead either attempting to load should trigger a clear NotImplementedError, or perhaps in this case xarray's assert functions should dispatch to virtualizarr's equals method instead of trying to coerce to a numpy array.
def test_lithops(self, netcdf4_files_factory):
# by default this will use the lithops LocalHost executor
filepath1, filepath2 = netcdf4_files_factory()
# test combine nested without in-memory indexes
combined_vds = open_virtual_mfdataset(
[filepath1, filepath2],
combine="nested",
concat_dim="time",
coords="minimal",
compat="override",
indexes={},
parallel="lithops",
)
vds1 = open_virtual_dataset(filepath1, indexes={})
vds2 = open_virtual_dataset(filepath2, indexes={})
expected_vds = xr.concat(
[vds1, vds2], dim="time", coords="minimal", compat="override"
)
> xrt.assert_identical(combined_vds.coords['lat'], expected_vds.coords['lat'])
virtualizarr/tests/test_backend.py:508:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/Users/tom/miniconda3/envs/numpy2.0_released/lib/python3.11/site-packages/xarray/core/formatting.py:973: in diff_array_repr
temp = [wrap_indent(short_array_repr(obj), start=" ") for obj in (a, b)]
/Users/tom/miniconda3/envs/numpy2.0_released/lib/python3.11/site-packages/xarray/core/formatting.py:973: in <listcomp>
temp = [wrap_indent(short_array_repr(obj), start=" ") for obj in (a, b)]
/Users/tom/miniconda3/envs/numpy2.0_released/lib/python3.11/site-packages/xarray/core/formatting.py:634: in short_array_repr
array = to_duck_array(array)
/Users/tom/miniconda3/envs/numpy2.0_released/lib/python3.11/site-packages/xarray/namedarray/pycompat.py:129: in to_duck_array
chunkmanager = get_chunked_array_type(data)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
args = (ManifestArray<shape=(25,), dtype=float32, chunks=(25,)>,), chunked_arrays = [ManifestArray<shape=(25,), dtype=float32, chunks=(25,)>]
chunked_array_types = {<class 'virtualizarr.manifests.array.ManifestArray'>}
chunkmanagers = {'cubed': <cubed_xarray.cubedmanager.CubedManager object at 0x12db73450>, 'dask': <xarray.namedarray.daskmanager.DaskManager object at 0x12cc32990>}
def get_chunked_array_type(*args: Any) -> ChunkManagerEntrypoint[Any]:
"""
Detects which parallel backend should be used for given set of arrays.
Also checks that all arrays are of same chunking type (i.e. not a mix of cubed and dask).
"""
# TODO this list is probably redundant with something inside xarray.apply_ufunc
ALLOWED_NON_CHUNKED_TYPES = {int, float, np.ndarray}
chunked_arrays = [
a
for a in args
if is_chunked_array(a) and type(a) not in ALLOWED_NON_CHUNKED_TYPES
]
# Asserts all arrays are the same type (or numpy etc.)
chunked_array_types = {type(a) for a in chunked_arrays}
if len(chunked_array_types) > 1:
raise TypeError(
f"Mixing chunked array types is not supported, but received multiple types: {chunked_array_types}"
)
elif len(chunked_array_types) == 0:
raise TypeError("Expected a chunked array but none were found")
# iterate over defined chunk managers, seeing if each recognises this array type
chunked_arr = chunked_arrays[0]
chunkmanagers = list_chunkmanagers()
selected = [
chunkmanager
for chunkmanager in chunkmanagers.values()
if chunkmanager.is_chunked_array(chunked_arr)
]
if not selected:
> raise TypeError(
f"Could not find a Chunk Manager which recognises type {type(chunked_arr)}"
E TypeError: Could not find a Chunk Manager which recognises type <class 'virtualizarr.manifests.array.ManifestArray'>