astroid
astroid copied to clipboard
ZipFinder is overzealous in searching for imports
This is perhaps best documented with the code I've used to find the flaws.
from astroid.modutils import file_info_from_modpath
import sys
import os
# Create a basic zip file with some test modules and packages inside
if not os.path.exists("zip_test.zip"):
import zipfile
print("creating zip_test.zip")
with zipfile.ZipFile("zip_test.zip", "x") as test_file:
w = test_file.writestr
w("zip_member.py", "var = 'member'")
w("zip_pkg/__init__.py", "var = 'pkg'")
w("zip_pkg/pkg_mod.py", "var = 'pkg/mod'")
w("zip_pkg/pkg_inv/inv_mod.py", "var = 'pkg/inv/inv'")
w("zip_ns/ns_inv.py", "var = 'ns/inv'")
w("zip_ns/ns_pkg/__init__.py", "var = 'ns/pkg'")
w("zip_ns/ns_pkg/ns_mod.py", "var = 'ns/pkg/mod'")
# It appars zipimport requires namespace directories to have an entry in
# the zip file. Is that standard with zip files?
w("zip_ns/", "")
# Test if a given module can be found with the given path
def test(mod, path):
curent_dir = os.path.dirname(os.path.abspath(__file__))
try:
info = file_info_from_modpath(mod.split('.'), path=path)
except ImportError:
print(f"{mod}, {path}: not found")
else:
display_type = info.type.name if hasattr(info.type, 'name') else info.type
if info.location is not None and info.location.startswith(curent_dir):
display_path = info.location[len(curent_dir)+1:]
# This is perhaps a bit too simple, and may not work on all python installs
elif info.location is not None and info.location.startswith(sys.path[2]):
display_path = "sys"+info.location[len(sys.path[2]):]
else:
display_path = info.location
print(f"{mod}, {path}: type {display_type} found at {display_path}")
# NOTE: with the exception of the ns_pkg, all of the zip imports are
# reporting paths without the .py suffix
test("xml", ["/invalid"]) # incorrect, should not be found
test("xml", []) # incorrect, should not be found
print() # These are unrelated to ZipFinder, but worth noting.
test("os.path", ["/invalid"]) # incorrect, should not be found, type is int(1)
test("os.path", []) # incorrect, should not be found, type is int(1)
test("os.path", None) # incorrect, type is int(1)
print()
test("zip_member", None) # ok, not found
test("zip_member", ["zip_test.zip"]) # ok, found
# ZipFinder cached the path to the zip provided and now searches it
print()
test("zip_member", None) # incorrect, should not be found
test("zip_pkg", None) # incorrect, should not be found
test("zip_pkg.pkg_mod", None) # incorrect, should not be found
print()
test("zip_pkg", ["zip_test.zip"]) # incorrect, path does not go to __init__.py
test("zip_pkg.pkg_mod", ["zip_test.zip"]) # ok
test("zip_ns", ["zip_test.zip"]) # ok
# Only the first level is checked for actually being a package
test("zip_pkg.pkg_inv.inv_mod", ["zip_test.zip"]) # incorrect, not importable
# The check doesn't allow namespace packages, which means these fail
test("zip_ns.ns_pkg.ns_mod", ["zip_test.zip"]) # incorrect, not found
test("zip_ns.ns_pkg", ["zip_test.zip"]) # incorrect, type is None
# the path to the namespace package was cached and ZipFinder now searches it
test("ns_pkg", None) # Incorrect, should not be found
print()
# paths can be specified, which doesn't really make much sense.
test("zip_pkg/pkg_mod", ["zip_test.zip"]) # incorrect, invalid module name
# The normal finder also suffers from this
test("urllib/request", None) # incorrect, invalid module name
print()
test("pkg_mod", None) # ok, not found
test("ns_mod", None) # ok, not found
# Importing a package should not affect the module found but the import
# machinery adds imports to sys.path_importer_cache, the same cache used
# by the ZipFinder.
sys.path.append("zip_test.zip")
import zip_pkg.pkg_mod
import zip_ns.ns_pkg.ns_mod
test("pkg_mod", None) # Incorrect, should not be found
test("ns_mod", None) # Incorrect, should not be found
Output
xml, ['/invalid']: type PY_ZIPMODULE found at sys/xml
xml, []: type PY_ZIPMODULE found at sys/xml
os.path, ['/invalid']: type 1 found at sys/posixpath.py
os.path, []: type 1 found at sys/posixpath.py
os.path, None: type 1 found at sys/posixpath.py
zip_member, None: not found
zip_member, ['zip_test.zip']: type PY_ZIPMODULE found at zip_test.zip/zip_member
zip_member, None: type PY_ZIPMODULE found at zip_test.zip/zip_member
zip_pkg, None: type PY_ZIPMODULE found at zip_test.zip/zip_pkg
zip_pkg.pkg_mod, None: type PY_ZIPMODULE found at zip_test.zip/zip_pkg/pkg_mod
zip_pkg, ['zip_test.zip']: type PY_ZIPMODULE found at zip_test.zip/zip_pkg
zip_pkg.pkg_mod, ['zip_test.zip']: type PY_ZIPMODULE found at zip_test.zip/zip_pkg/pkg_mod
zip_ns, ['zip_test.zip']: type PY_NAMESPACE found at None
zip_pkg.pkg_inv.inv_mod, ['zip_test.zip']: type PY_ZIPMODULE found at zip_test.zip/zip_pkg/pkg_inv/inv_mod
zip_ns.ns_pkg.ns_mod, ['zip_test.zip']: not found
zip_ns.ns_pkg, ['zip_test.zip']: type None found at zip_test.zip/zip_ns/ns_pkg/__init__.py
ns_pkg, None: type PY_ZIPMODULE found at zip_test.zip/zip_ns/ns_pkg
zip_pkg/pkg_mod, ['zip_test.zip']: type PY_ZIPMODULE found at zip_test.zip/zip_pkg/pkg_mod
urllib/request, None: type PY_SOURCE found at sys/urllib/request.py
pkg_mod, None: not found
ns_mod, None: not found
pkg_mod, None: type PY_ZIPMODULE found at zip_test.zip/zip_pkg/pkg_mod
ns_mod, None: type PY_ZIPMODULE found at zip_test.zip/zip_ns/ns_pkg/ns_mod
The TL;DR is ZipFinder searches every path ever, even if it isn't a zip file, even if it isn't supposed to search any path in the first place. There's not filter, and the passed path argument is only used to add more paths to search for. It also doesn't properly handle namespace packages inside zip files.
(tested on commit 3f0fdc5d "version" 2.0.4 using Python 3.7)
Thanks for creating an issue! As mentioned in the documentation of the function, path is not a filter and it is used for adding more paths to search for. Apart of some of the zip searching not working as expected, the others look alright to me, e.g.
test("xml", ["/invalid"]) # incorrect, should not be found
test("xml", []) # incorrect, should not be found
Not sure why you think these are incorrect, it is finding the xml package from the standard library.
If you compare it to say test("xml", None) you'll notice both the path and type is incorrect. The standard xml module, at least on my system is not a zip module. Also, because ZipFinder is later in the chain the result appears because the ImpFinder didn't find it. path may not be clearly documented as a filter, but it's used as a filter for example in get_module_part, and the other finders respect it as a filter.