VirtualiZarr icon indicating copy to clipboard operation
VirtualiZarr copied to clipboard

Xarray assertions can trigger loading

Open TomNicholas opened this issue 1 year ago • 0 comments

This should not happen, instead either attempting to load should trigger a clear NotImplementedError, or perhaps in this case xarray's assert functions should dispatch to virtualizarr's equals method instead of trying to coerce to a numpy array.

    def test_lithops(self, netcdf4_files_factory):
        # by default this will use the lithops LocalHost executor
    
        filepath1, filepath2 = netcdf4_files_factory()
    
        # test combine nested without in-memory indexes
        combined_vds = open_virtual_mfdataset(
            [filepath1, filepath2],
            combine="nested",
            concat_dim="time",
            coords="minimal",
            compat="override",
            indexes={},
            parallel="lithops",
        )
        vds1 = open_virtual_dataset(filepath1, indexes={})
        vds2 = open_virtual_dataset(filepath2, indexes={})
        expected_vds = xr.concat(
            [vds1, vds2], dim="time", coords="minimal", compat="override"
        )
    
>       xrt.assert_identical(combined_vds.coords['lat'], expected_vds.coords['lat'])

virtualizarr/tests/test_backend.py:508: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
/Users/tom/miniconda3/envs/numpy2.0_released/lib/python3.11/site-packages/xarray/core/formatting.py:973: in diff_array_repr
    temp = [wrap_indent(short_array_repr(obj), start="    ") for obj in (a, b)]
/Users/tom/miniconda3/envs/numpy2.0_released/lib/python3.11/site-packages/xarray/core/formatting.py:973: in <listcomp>
    temp = [wrap_indent(short_array_repr(obj), start="    ") for obj in (a, b)]
/Users/tom/miniconda3/envs/numpy2.0_released/lib/python3.11/site-packages/xarray/core/formatting.py:634: in short_array_repr
    array = to_duck_array(array)
/Users/tom/miniconda3/envs/numpy2.0_released/lib/python3.11/site-packages/xarray/namedarray/pycompat.py:129: in to_duck_array
    chunkmanager = get_chunked_array_type(data)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

args = (ManifestArray<shape=(25,), dtype=float32, chunks=(25,)>,), chunked_arrays = [ManifestArray<shape=(25,), dtype=float32, chunks=(25,)>]
chunked_array_types = {<class 'virtualizarr.manifests.array.ManifestArray'>}
chunkmanagers = {'cubed': <cubed_xarray.cubedmanager.CubedManager object at 0x12db73450>, 'dask': <xarray.namedarray.daskmanager.DaskManager object at 0x12cc32990>}

    def get_chunked_array_type(*args: Any) -> ChunkManagerEntrypoint[Any]:
        """
        Detects which parallel backend should be used for given set of arrays.
    
        Also checks that all arrays are of same chunking type (i.e. not a mix of cubed and dask).
        """
    
        # TODO this list is probably redundant with something inside xarray.apply_ufunc
        ALLOWED_NON_CHUNKED_TYPES = {int, float, np.ndarray}
    
        chunked_arrays = [
            a
            for a in args
            if is_chunked_array(a) and type(a) not in ALLOWED_NON_CHUNKED_TYPES
        ]
    
        # Asserts all arrays are the same type (or numpy etc.)
        chunked_array_types = {type(a) for a in chunked_arrays}
        if len(chunked_array_types) > 1:
            raise TypeError(
                f"Mixing chunked array types is not supported, but received multiple types: {chunked_array_types}"
            )
        elif len(chunked_array_types) == 0:
            raise TypeError("Expected a chunked array but none were found")
    
        # iterate over defined chunk managers, seeing if each recognises this array type
        chunked_arr = chunked_arrays[0]
        chunkmanagers = list_chunkmanagers()
        selected = [
            chunkmanager
            for chunkmanager in chunkmanagers.values()
            if chunkmanager.is_chunked_array(chunked_arr)
        ]
        if not selected:
>           raise TypeError(
                f"Could not find a Chunk Manager which recognises type {type(chunked_arr)}"
E               TypeError: Could not find a Chunk Manager which recognises type <class 'virtualizarr.manifests.array.ManifestArray'>

TomNicholas avatar Dec 17 '24 16:12 TomNicholas