scrapyrt icon indicating copy to clipboard operation
scrapyrt copied to clipboard

Add package support and support for launching via `python -m scrapyrt`

Open SamuelMarks opened this issue 8 months ago • 0 comments

Also reflect Python 3.12 support; add some env vars default fallbacks for common CLI args; and fix a os.path.join to be properly cross-platform

As for the package CLI arg, you can include, say given the output of scrapy startproject tutorial that you run within your existing package hierarchy:

/tmp$ mkdir package_name && cd "$_"
/tmp/package_name$ touch setup.py
/tmp/package_name$ mkdir package_name && cd "$_"
/tmp/package_name/package_name$ touch __init__.py
/tmp/package_name/package_name$ scrapy startproject tutorial
/tmp/package_name$ touch package_name/tutorial/__init__.py
/tmp/package_name$ curl -L https://raw.githubusercontent.com/scrapinghub/sample-projects/master/quotes_crawler/quotes_crawler/spiders/toscrape-infinite-scrolling.py -o package_name/tutorial/tutorial/spiders/toscrape-infinite-scrolling.py
/tmp/package_name$ tree --charset=ascii
.
|-- package_name
|   |-- __init__.py
|   `-- tutorial
|       |-- __init__.py
|       |-- scrapy.cfg
|       `-- tutorial
|           |-- __init__.py
|           |-- items.py
|           |-- middlewares.py
|           |-- pipelines.py
|           |-- settings.py
|           `-- spiders
|               |-- __init__.py
|               `-- toscrape-infinite-scrolling.py
`-- setup.py

Then you can ensure scrapy.cfg gets installed when you python -m pip install . or python -m pip install -e . with this Python 3.12 compatible implementation:

import sys
from ast import Assign, Constant, Str, parse
from functools import partial
from operator import attrgetter
from os import path
from os.path import extsep

from setuptools import find_packages, setup

if sys.version_info[:2] >= (3, 12):
    import os
    from sysconfig import _BASE_EXEC_PREFIX as BASE_EXEC_PREFIX
    from sysconfig import _BASE_PREFIX as BASE_PREFIX
    from sysconfig import _EXEC_PREFIX as EXEC_PREFIX
    from sysconfig import _PREFIX as PREFIX
    from sysconfig import get_python_version

    def is_virtual_environment():
        """
        Whether one is in a virtual environment
        """
        return sys.base_prefix != sys.prefix or hasattr(sys, "real_prefix")

    def get_python_lib(plat_specific=0, standard_lib=0, prefix=None):
        """Return the directory containing the Python library (standard or
        site additions).

        If 'plat_specific' is true, return the directory containing
        platform-specific modules, i.e. any module from a non-pure-Python
        module distribution; otherwise, return the platform-shared library
        directory.  If 'standard_lib' is true, return the directory
        containing standard Python library modules; otherwise, return the
        directory for site-specific modules.

        If 'prefix' is supplied, use it instead of sys.base_prefix or
        sys.base_exec_prefix -- i.e., ignore 'plat_specific'.
        """
        is_default_prefix = not prefix or os.path.normpath(prefix) in (
            "/usr",
            "/usr/local",
        )
        if prefix is None:
            if standard_lib:
                prefix = plat_specific and BASE_EXEC_PREFIX or BASE_PREFIX
            else:
                prefix = plat_specific and EXEC_PREFIX or PREFIX

        if os.name == "posix":
            if plat_specific or standard_lib:
                # Platform-specific modules (any module from a non-pure-Python
                # module distribution) or standard Python library modules.
                libdir = sys.platlibdir
            else:
                # Pure Python
                libdir = "lib"
            libpython = os.path.join(prefix, libdir, "python" + get_python_version())
            if standard_lib:
                return libpython
            elif is_default_prefix and not is_virtual_environment():
                return os.path.join(prefix, "lib", "python3", "dist-packages")
            else:
                return os.path.join(libpython, "site-packages")
        elif os.name == "nt":
            if standard_lib:
                return os.path.join(prefix, "Lib")
            else:
                return os.path.join(prefix, "Lib", "site-packages")
        else:

            class DistutilsPlatformError(Exception):
                """DistutilsPlatformError"""

            raise DistutilsPlatformError(
                "I don't know where Python installs its library "
                "on platform '%s'" % os.name
            )

else:
    from distutils.sysconfig import get_python_lib

package_name = "package_name"


def to_funcs(*paths):
    """
    Produce function tuples that produce the local and install dir, respectively.

    :param paths: one or more str, referring to relative folder names
    :type paths: ```*paths```

    :return: 2 functions
    :rtype: ```Tuple[Callable[Optional[List[str]], str], Callable[Optional[List[str]], str]]```
    """
    return (
        partial(path.join, path.dirname(__file__), package_name, *paths),
        partial(path.join, get_python_lib(prefix=""), package_name, *paths),
    )


def main():
    """Main function for setup.py; this actually does the installation"""

    tutorial_join, tutorial_install_dir = to_funcs("tutorial")

    setup(
        name=package_name,
        packages=find_packages(),
        package_dir={package_name: package_name},
        classifiers=[],
        python_requires=">=3.8",
        entry_points={
            "scrapy.commands": [
                "scroll=package_name.package_name.tutorial.spiders.toscrape-infinite-scrolling:ToScrapeInfiniteScrollingSpider",
            ],
        },
        data_files=[
            (
                tutorial_install_dir(),
                list(
                    filter(
                        lambda p: path.isfile(p) and not p.endswith(".py"),
                        list(map(tutorial_join, os.listdir(tutorial_join()))),
                    )
                ),
            ),
        ],
    )


def setup_py_main():
    """Calls main if `__name__ == '__main__'`"""
    if __name__ == "__main__":
        main()


setup_py_main()

Which, as of this PR, would enable this to work:

$ scrapyrt --package package_name.tutorial

SamuelMarks avatar Nov 02 '23 07:11 SamuelMarks