scrapyrt
scrapyrt copied to clipboard
Add package support and support for launching via `python -m scrapyrt`
Also reflect Python 3.12 support; add some env vars default fallbacks for common CLI args; and fix a os.path.join
to be properly cross-platform
As for the package
CLI arg, you can include, say given the output of scrapy startproject tutorial
that you run within your existing package hierarchy:
/tmp$ mkdir package_name && cd "$_"
/tmp/package_name$ touch setup.py
/tmp/package_name$ mkdir package_name && cd "$_"
/tmp/package_name/package_name$ touch __init__.py
/tmp/package_name/package_name$ scrapy startproject tutorial
/tmp/package_name$ touch package_name/tutorial/__init__.py
/tmp/package_name$ curl -L https://raw.githubusercontent.com/scrapinghub/sample-projects/master/quotes_crawler/quotes_crawler/spiders/toscrape-infinite-scrolling.py -o package_name/tutorial/tutorial/spiders/toscrape-infinite-scrolling.py
/tmp/package_name$ tree --charset=ascii
.
|-- package_name
| |-- __init__.py
| `-- tutorial
| |-- __init__.py
| |-- scrapy.cfg
| `-- tutorial
| |-- __init__.py
| |-- items.py
| |-- middlewares.py
| |-- pipelines.py
| |-- settings.py
| `-- spiders
| |-- __init__.py
| `-- toscrape-infinite-scrolling.py
`-- setup.py
Then you can ensure scrapy.cfg
gets installed when you python -m pip install .
or python -m pip install -e .
with this Python 3.12 compatible implementation:
import sys
from ast import Assign, Constant, Str, parse
from functools import partial
from operator import attrgetter
from os import path
from os.path import extsep
from setuptools import find_packages, setup
if sys.version_info[:2] >= (3, 12):
import os
from sysconfig import _BASE_EXEC_PREFIX as BASE_EXEC_PREFIX
from sysconfig import _BASE_PREFIX as BASE_PREFIX
from sysconfig import _EXEC_PREFIX as EXEC_PREFIX
from sysconfig import _PREFIX as PREFIX
from sysconfig import get_python_version
def is_virtual_environment():
"""
Whether one is in a virtual environment
"""
return sys.base_prefix != sys.prefix or hasattr(sys, "real_prefix")
def get_python_lib(plat_specific=0, standard_lib=0, prefix=None):
"""Return the directory containing the Python library (standard or
site additions).
If 'plat_specific' is true, return the directory containing
platform-specific modules, i.e. any module from a non-pure-Python
module distribution; otherwise, return the platform-shared library
directory. If 'standard_lib' is true, return the directory
containing standard Python library modules; otherwise, return the
directory for site-specific modules.
If 'prefix' is supplied, use it instead of sys.base_prefix or
sys.base_exec_prefix -- i.e., ignore 'plat_specific'.
"""
is_default_prefix = not prefix or os.path.normpath(prefix) in (
"/usr",
"/usr/local",
)
if prefix is None:
if standard_lib:
prefix = plat_specific and BASE_EXEC_PREFIX or BASE_PREFIX
else:
prefix = plat_specific and EXEC_PREFIX or PREFIX
if os.name == "posix":
if plat_specific or standard_lib:
# Platform-specific modules (any module from a non-pure-Python
# module distribution) or standard Python library modules.
libdir = sys.platlibdir
else:
# Pure Python
libdir = "lib"
libpython = os.path.join(prefix, libdir, "python" + get_python_version())
if standard_lib:
return libpython
elif is_default_prefix and not is_virtual_environment():
return os.path.join(prefix, "lib", "python3", "dist-packages")
else:
return os.path.join(libpython, "site-packages")
elif os.name == "nt":
if standard_lib:
return os.path.join(prefix, "Lib")
else:
return os.path.join(prefix, "Lib", "site-packages")
else:
class DistutilsPlatformError(Exception):
"""DistutilsPlatformError"""
raise DistutilsPlatformError(
"I don't know where Python installs its library "
"on platform '%s'" % os.name
)
else:
from distutils.sysconfig import get_python_lib
package_name = "package_name"
def to_funcs(*paths):
"""
Produce function tuples that produce the local and install dir, respectively.
:param paths: one or more str, referring to relative folder names
:type paths: ```*paths```
:return: 2 functions
:rtype: ```Tuple[Callable[Optional[List[str]], str], Callable[Optional[List[str]], str]]```
"""
return (
partial(path.join, path.dirname(__file__), package_name, *paths),
partial(path.join, get_python_lib(prefix=""), package_name, *paths),
)
def main():
"""Main function for setup.py; this actually does the installation"""
tutorial_join, tutorial_install_dir = to_funcs("tutorial")
setup(
name=package_name,
packages=find_packages(),
package_dir={package_name: package_name},
classifiers=[],
python_requires=">=3.8",
entry_points={
"scrapy.commands": [
"scroll=package_name.package_name.tutorial.spiders.toscrape-infinite-scrolling:ToScrapeInfiniteScrollingSpider",
],
},
data_files=[
(
tutorial_install_dir(),
list(
filter(
lambda p: path.isfile(p) and not p.endswith(".py"),
list(map(tutorial_join, os.listdir(tutorial_join()))),
)
),
),
],
)
def setup_py_main():
"""Calls main if `__name__ == '__main__'`"""
if __name__ == "__main__":
main()
setup_py_main()
Which, as of this PR, would enable this to work:
$ scrapyrt --package package_name.tutorial