Add API to support overriding `protocol_config` in `WrappedFileSystemFlavour`
I know that WrappedFileSystemFlavour is an internal and experimental class but I am in a situation where I need to override the protocol_config in this class to support my custom UPath which mimicks the behavior of S3Path. Here is a minimal example to reproduce this where I want to create a UPath called foo
test.py
import fsspec # type: ignore [import-untyped]
from fsspec.implementations.arrow import ArrowFSWrapper # type: ignore [import-untyped]
from fsspec.utils import infer_storage_options # type: ignore [import-untyped]
from upath import UPath
from upath import registry as upath_registry
from upath._flavour import WrappedFileSystemFlavour
from upath.implementations.cloud import S3Path
class FooFileSystem(ArrowFSWrapper):
protocol = "foo"
def __init__(self, *args, **kwargs):
from pyarrow.fs import S3FileSystem # type: ignore [import-untyped]
fs = S3FileSystem()
super().__init__(fs=fs, **kwargs)
@classmethod
def _strip_protocol(cls, path: str) -> str:
# upstream fsspec has hardcoded `host + path` for s3/s3a we need this for `foo` as well.
storage_opts = infer_storage_options(path)
if host := storage_opts.get("host"):
storage_opts["path"] = host + storage_opts["path"]
path_without_protocol = str(storage_opts["path"])
if path_without_protocol.startswith("//"):
# special case for "hdfs://path" (without the triple slash)
path_without_protocol = path_without_protocol[1:]
return path_without_protocol
class FooPath(S3Path):
pass
fsspec.register_implementation("foo", FooFileSystem)
upath_registry.register_implementation("foo", FooPath)
path = UPath("foo://bar/baz")
throws the following error
Traceback (most recent call last):
File "/Users/rahuliyer/test.py", line 47, in <module>
path = UPath("foo://bar/baz")
File "/Users/rahuliyer/.venv/lib/python3.10/site-packages/upath/implementations/cloud.py", line 92, in __init__
raise ValueError("non key-like path provided (bucket/container missing)")
ValueError: non key-like path provided (bucket/container missing)
The same works if I override protocol_config in the following manner before initializing the UPath
WrappedFileSystemFlavour.protocol_config["netloc_is_anchor"] |= {"foo"}
WrappedFileSystemFlavour.protocol_config["supports_empty_parts"] |= {"foo"}
What's the best approach here to get this working without having the override protocol_config of an internal class?
Hi @rahuliyer95,
Thanks for opening the issue for discussion! After initial work on moving universal-pathlib to pathlib_abc.PathBase I think it makes sense to expose WrappedFileSystemFlavour as FSSpecParser exactly for use cases like yours. I'll rework parts of the API and align it with #270 so that your code will be forward compatible with the next universal-pathlib releases.
It will then look somewhat like this:
# THIS IS AN EXAMPLE FOR A FUTURE UPATH VERSION, THE ACTUAL WAY TO DO THIS MIGHT CHANGE
from fsspec.implementations.arrow import ArrowFSWrapper
from upath import UPath
from upath.core import FSSpecParser
from upath.implementations.cloud import S3Path
class FooFileSystem(ArrowFSWrapper):
protocol = "foo"
...
class FooPath(S3Path):
parser = FSSpecParser(FooFileSystem, netloc_is_anchor=True, supports_empty_parts=True)
Let me know what you think, Cheers, Andreas 😃
Hi @rahuliyer95,
Thanks for opening the issue for discussion! After initial work on moving universal-pathlib to
pathlib_abc.PathBaseI think it makes sense to exposeWrappedFileSystemFlavourasFSSpecParserexactly for use cases like yours. I'll rework parts of the API and align it with #270 so that your code will be forward compatible with the next universal-pathlib releases.It will then look somewhat like this:
# THIS IS AN EXAMPLE FOR A FUTURE UPATH VERSION, THE ACTUAL WAY TO DO THIS MIGHT CHANGE from fsspec.implementations.arrow import ArrowFSWrapper from upath import UPath from upath.core import FSSpecParser from upath.implementations.cloud import S3Path class FooFileSystem(ArrowFSWrapper): protocol = "foo" ... class FooPath(S3Path): parser = FSSpecParser(FooFileSystem, netloc_is_anchor=True, supports_empty_parts=True)Let me know what you think, Cheers, Andreas 😃
This makes a lot of sense to me, looking forward to using this as soon as it's released. Thanks!
@ap-- this is how I got it working with the latest version, not sure if the APIs should be made public?
import fsspec # type: ignore [import-untyped]
from fsspec.implementations.arrow import ArrowFSWrapper # type: ignore [import-untyped]
from fsspec.utils import infer_storage_options # type: ignore [import-untyped]
from upath import UPath
from upath import registry as upath_registry
from upath._flavour import WrappedFileSystemFlavour
from upath._flavour_sources import AbstractFileSystemFlavour
from upath.implementations.cloud import S3Path
from upath.types import JoinablePathLike
class FooFileSystem(ArrowFSWrapper):
protocol = "foo"
def __init__(self, *args, **kwargs):
from pyarrow.fs import S3FileSystem # type: ignore [import-untyped]
fs = S3FileSystem()
super().__init__(fs=fs, **kwargs)
@classmethod
def _strip_protocol(cls, path: str) -> str:
# upstream fsspec has hardcoded `host + path` for s3/s3a we need this for `foo` as well.
storage_opts = infer_storage_options(path)
if host := storage_opts.get("host"):
storage_opts["path"] = host + storage_opts["path"]
path_without_protocol = str(storage_opts["path"])
if path_without_protocol.startswith("//"):
# special case for "hdfs://path" (without the triple slash)
path_without_protocol = path_without_protocol[1:]
return path_without_protocol
class FooFileSystemFlavour(AbstractFileSystemFlavour):
protocol = "foo"
root_marker = ""
sep = "/"
class FooPath(S3Path):
parser = WrappedFileSystemFlavour(FooFileSystemFlavour, netloc_is_anchor=True, supports_empty_parts=True)
fsspec.register_implementation("foo", FooFileSystem)
upath_registry.register_implementation("foo", FooPath)