pipdeptree icon indicating copy to clipboard operation
pipdeptree copied to clipboard

FR: Include check if package is C extension

Open mike-lawrence opened this issue 1 year ago • 0 comments

This SO answer provides code to determine if a package is pure-python or is a c-extension:

from importlib.machinery import ExtensionFileLoader, EXTENSION_SUFFIXES
import inspect
import logging
import os
import os.path
import pkgutil
from types import ModuleType
from typing import List

log = logging.getLogger(__name__)


def is_builtin_module(module: ModuleType) -> bool:
    """
    Is this module a built-in module, like ``os``?
    Method is as per :func:`inspect.getfile`.
    """
    return not hasattr(module, "__file__")


def is_module_a_package(module: ModuleType) -> bool:
    assert inspect.ismodule(module)
    return os.path.basename(inspect.getfile(module)) == "__init__.py"


def is_c_extension(module: ModuleType) -> bool:
    """
    Modified from
    https://stackoverflow.com/questions/20339053/in-python-how-can-one-tell-if-a-module-comes-from-a-c-extension.

    ``True`` only if the passed module is a C extension implemented as a
    dynamically linked shared library specific to the current platform.

    Args:
        module: Previously imported module object to be tested.

    Returns:
        bool: ``True`` only if this module is a C extension.

    Examples:

    .. code-block:: python

        from cardinal_pythonlib.modules import is_c_extension

        import os
        import _elementtree as et
        import numpy
        import numpy.core.multiarray as numpy_multiarray

        is_c_extension(os)  # False
        is_c_extension(numpy)  # False
        is_c_extension(et)  # False on my system (Python 3.5.6). True in the original example.
        is_c_extension(numpy_multiarray)  # True

    """  # noqa
    assert inspect.ismodule(module), f'"{module}" not a module.'

    # If this module was loaded by a PEP 302-compliant CPython-specific loader
    # loading only C extensions, this module is a C extension.
    if isinstance(getattr(module, '__loader__', None), ExtensionFileLoader):
        return True

    # If it's built-in, it's not a C extension.
    if is_builtin_module(module):
        return False

    # Else, fallback to filetype matching heuristics.
    #
    # Absolute path of the file defining this module.
    module_filename = inspect.getfile(module)

    # "."-prefixed filetype of this path if any or the empty string otherwise.
    module_filetype = os.path.splitext(module_filename)[1]

    # This module is only a C extension if this path's filetype is that of a
    # C extension specific to the current platform.
    return module_filetype in EXTENSION_SUFFIXES


def contains_c_extension(module: ModuleType,
                         import_all_submodules: bool = True,
                         include_external_imports: bool = False,
                         seen: List[ModuleType] = None,
                         verbose: bool = False) -> bool:
    """
    Extends :func:`is_c_extension` by asking: is this module, or any of its
    submodules, a C extension?

    Args:
        module: Previously imported module object to be tested.
        import_all_submodules: explicitly import all submodules of this module?
        include_external_imports: check modules in other packages that this
            module imports?
        seen: used internally for recursion (to deal with recursive modules);
            should be ``None`` when called by users
        verbose: show working via log?

    Returns:
        bool: ``True`` only if this module or one of its submodules is a C
        extension.

    Examples:

    .. code-block:: python

        import logging

        import _elementtree as et
        import os

        import arrow
        import alembic
        import django
        import numpy
        import numpy.core.multiarray as numpy_multiarray

        log = logging.getLogger(__name__)
        logging.basicConfig(level=logging.DEBUG)  # be verbose

        contains_c_extension(os)  # False
        contains_c_extension(et)  # False

        contains_c_extension(numpy)  # True -- different from is_c_extension()
        contains_c_extension(numpy_multiarray)  # True

        contains_c_extension(arrow)  # False

        contains_c_extension(alembic)  # False
        contains_c_extension(alembic, include_external_imports=True)  # True
        # ... this example shows that Alembic imports hashlib, which can import
        #     _hashlib, which is a C extension; however, that doesn't stop us (for
        #     example) installing Alembic on a machine with no C compiler

        contains_c_extension(django)

    """  # noqa
    assert inspect.ismodule(module), f'"{module}" not a module.'

    if seen is None:  # only true for the top-level call
        seen = []  # type: List[ModuleType]
    if module in seen:  # modules can "contain" themselves
        # already inspected; avoid infinite loops
        return False
    seen.append(module)

    # Check the thing we were asked about
    is_c_ext = is_c_extension(module)
    if verbose:
        log.info(f"Is module {module!r} a C extension? {is_c_ext}")
    if is_c_ext:
        return True
    if is_builtin_module(module):
        # built-in, therefore we stop searching it
        return False

    # Now check any children, in a couple of ways

    top_level_module = seen[0]
    top_path = os.path.dirname(top_level_module.__file__)

    # Recurse using dir(). This picks up modules that are automatically
    # imported by our top-level model. But it won't pick up all submodules;
    # try e.g. for django.
    for candidate_name in dir(module):
        candidate = getattr(module, candidate_name)
        # noinspection PyBroadException
        try:
            if not inspect.ismodule(candidate):
                # not a module
                continue
        except Exception:
            # e.g. a Django module that won't import until we configure its
            # settings
            log.error(f"Failed to test ismodule() status of {candidate!r}")
            continue
        if is_builtin_module(candidate):
            # built-in, therefore we stop searching it
            continue

        candidate_fname = getattr(candidate, "__file__")
        if not include_external_imports:
            if os.path.commonpath([top_path, candidate_fname]) != top_path:
                if verbose:
                    log.debug(f"Skipping, not within the top-level module's "
                              f"directory: {candidate!r}")
                continue
        # Recurse:
        if contains_c_extension(
                module=candidate,
                import_all_submodules=False,  # only done at the top level, below  # noqa
                include_external_imports=include_external_imports,
                seen=seen):
            return True

    if import_all_submodules:
        if not is_module_a_package(module):
            if verbose:
                log.debug(f"Top-level module is not a package: {module!r}")
            return False

        # Otherwise, for things like Django, we need to recurse in a different
        # way to scan everything.
        # See https://stackoverflow.com/questions/3365740/how-to-import-all-submodules.  # noqa
        log.debug(f"Walking path: {top_path!r}")
        try:
            for loader, module_name, is_pkg in pkgutil.walk_packages([top_path]):  # noqa
                if not is_pkg:
                    log.debug(f"Skipping, not a package: {module_name!r}")
                    continue
                log.debug(f"Manually importing: {module_name!r}")
                # noinspection PyBroadException
                try:
                    candidate = loader.find_module(module_name)\
                        .load_module(module_name)  # noqa
                except Exception:
                    # e.g. Alembic "autogenerate" gives: "ValueError: attempted
                    # relative import beyond top-level package"; or Django
                    # "django.core.exceptions.ImproperlyConfigured"
                    log.error(f"Package failed to import: {module_name!r}")
                    continue
                if contains_c_extension(
                        module=candidate,
                        import_all_submodules=False,  # only done at the top level  # noqa
                        include_external_imports=include_external_imports,
                        seen=seen):
                    return True
        except Exception:
            log.error("Unable to walk packages further; no C extensions "
                      "detected so far!")
            raise

    return False

I think it should be trivial to add as a feature of the output information by modifying the ReqPackage class to have the property:

    @property
    def is_c_extension(self):
        try:
            mod = importlib.import_module(self.project_name)
            return is_c_extension(mod)
        except:
            return 'unknown'

then modify its render_as_branch method to be:

   def render_as_branch(self, frozen):
        if not frozen:
            req_ver = self.version_spec if self.version_spec else 'Any'
            return (
                '{0} [required: {1}, installed: {2}, c: {3}]'
                ).format(self.project_name, req_ver, self.installed_version,self.is_c_extension)
        else:
            return self.render_as_root(frozen)

(with addition of importlib as an import)

Thought this might be a useful addition to the package.

mike-lawrence avatar Aug 11 '22 19:08 mike-lawrence