Not able to open files with a colon in the filename
Hi! I really like your package and would like to use it for my research including WRF output data. Generally, these come without suffixes and the filenames contain colons. I found a recent issue on file suffixes (https://github.com/zarr-developers/VirtualiZarr/issues/582), but nothing on colons in filepaths. Unfortunately, this seems to be an issue, too. Here my findings after adding a '.nc' to a filename:
from virtualizarr import open_virtual_dataset
open_virtual_dataset('wrfout_d01_2024-01-02_01:00:00.nc')
Error:
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
Cell In[11], line 2
1 from virtualizarr import open_virtual_dataset
----> 2 open_virtual_dataset('wrfout_d01_2024-01-02_01:00:00')
File ~/.conda/envs/env_virtualizarr/lib/python3.12/site-packages/virtualizarr/backend.py:199, in open_virtual_dataset(filepath, filetype, group, drop_variables, loadable_variables, decode_times, cftime_variables, indexes, virtual_array_class, virtual_backend_kwargs, reader_options, backend)
196 if backend_cls is None:
197 raise NotImplementedError(f"Unsupported file type: {filetype.name}")
--> 199 vds = backend_cls.open_virtual_dataset(
200 filepath,
201 group=group,
202 drop_variables=drop_variables,
203 loadable_variables=loadable_variables,
204 decode_times=decode_times,
205 indexes=indexes,
206 virtual_backend_kwargs=virtual_backend_kwargs,
207 reader_options=reader_options,
208 )
210 return vds
File ~/.conda/envs/env_virtualizarr/lib/python3.12/site-packages/virtualizarr/readers/hdf/hdf.py:86, in HDFVirtualBackend.open_virtual_dataset(filepath, group, drop_variables, loadable_variables, decode_times, indexes, virtual_backend_kwargs, reader_options)
77 drop_variables, loadable_variables = check_for_collisions(
78 drop_variables,
79 loadable_variables,
80 )
82 filepath = validate_and_normalize_path_to_uri(
83 filepath, fs_root=Path.cwd().as_uri()
84 )
---> 86 virtual_vars = HDFVirtualBackend._virtual_vars_from_hdf(
87 path=filepath,
88 group=group,
89 drop_variables=drop_variables + loadable_variables,
90 reader_options=reader_options,
91 )
93 loadable_vars, indexes = maybe_open_loadable_vars_and_indexes(
94 filepath,
95 loadable_variables=loadable_variables,
(...) 100 decode_times=decode_times,
101 )
103 attrs = HDFVirtualBackend._get_group_attrs(
104 path=filepath, reader_options=reader_options, group=group
105 )
File ~/.conda/envs/env_virtualizarr/lib/python3.12/site-packages/virtualizarr/readers/hdf/hdf.py:399, in HDFVirtualBackend._virtual_vars_from_hdf(path, group, drop_variables, reader_options)
394 if drop_variables is None:
395 drop_variables = []
397 open_file = _FsspecFSFromFilepath(
398 filepath=path, reader_options=reader_options
--> 399 ).open_file()
400 f = h5py.File(open_file, mode="r")
402 if group is not None and group != "":
File ~/.conda/envs/env_virtualizarr/lib/python3.12/site-packages/virtualizarr/utils.py:47, in _FsspecFSFromFilepath.open_file(self)
39 def open_file(self) -> OpenFileType:
40 """Calls `.open` on fsspec.Filesystem instantiation using self.filepath as an input.
41
42 Returns
(...) 45 file opened with fsspec
46 """
---> 47 return self.fs.open(self.filepath)
File ~/.conda/envs/env_virtualizarr/lib/python3.12/site-packages/fsspec/spec.py:1310, in AbstractFileSystem.open(self, path, mode, block_size, cache_options, compression, **kwargs)
1308 else:
1309 ac = kwargs.pop("autocommit", not self._intrans)
-> 1310 f = self._open(
1311 path,
1312 mode=mode,
1313 block_size=block_size,
1314 autocommit=ac,
1315 cache_options=cache_options,
1316 **kwargs,
1317 )
1318 if compression is not None:
1319 from fsspec.compression import compr
File ~/.conda/envs/env_virtualizarr/lib/python3.12/site-packages/fsspec/implementations/local.py:201, in LocalFileSystem._open(self, path, mode, block_size, **kwargs)
199 if self.auto_mkdir and "w" in mode:
200 self.makedirs(self._parent(path), exist_ok=True)
--> 201 return LocalFileOpener(path, mode, fs=self, **kwargs)
File ~/.conda/envs/env_virtualizarr/lib/python3.12/site-packages/fsspec/implementations/local.py:365, in LocalFileOpener.__init__(self, path, mode, autocommit, fs, compression, **kwargs)
363 self.compression = get_compression(path, compression)
364 self.blocksize = io.DEFAULT_BUFFER_SIZE
--> 365 self._open()
File ~/.conda/envs/env_virtualizarr/lib/python3.12/site-packages/fsspec/implementations/local.py:370, in LocalFileOpener._open(self)
368 if self.f is None or self.f.closed:
369 if self.autocommit or "w" not in self.mode:
--> 370 self.f = open(self.path, mode=self.mode)
371 if self.compression:
372 compress = compr[self.compression]
FileNotFoundError: [Errno 2] No such file or directory: 'wrfout_d01_2024-01-02_01%3A00%3A00.nc'
Hey there @georgiostha 👋
Thanks for the bug report. I tried this out with the latest changes on the develop branch and got:
FileNotFoundError: Object at location /Users/nrhagen/Documents/carbonplan/VirtualiZarr/air_temp%253A1.nc not found: No such file or directory (os error 2)
Debug source:
NotFound {
path: "/Users/nrhagen/Documents/carbonplan/VirtualiZarr/air_temp%253A1.nc",
source: Os {
code: 2,
kind: NotFound,
message: "No such file or directory",
},
}
It seems like validate_and_normalize_path_to_uri encodes the :s to %253A to make compliant URIs.
@TomNicholas or @kylebarron any thoughts on to deal with files with :'s?
I assume obstore/object_store expects to handle the normalization itself. Can you try removing that normalization from the virtualizarr side?
Can you try removing that normalization from the virtualizarr side?
Yeah we should just remove these checks. That would solve #582 also. @norlandrhagen would you have time to have a go at this?
This was fixed by https://github.com/zarr-developers/VirtualiZarr/pull/659