scrapy_for_zh_wiki
scrapy_for_zh_wiki copied to clipboard
爬虫代码出现下列报错
2023-05-24 15:44:49 [scrapy.core.scraper] ERROR: Error downloading <GET https://zh.wikipedia.org/wiki/%E8%A1%8C%E7%A8%8B> Traceback (most recent call last): File "/Users/luhongyang/opt/anaconda3/python.app/Contents/lib/python3.9/site-packages/tldextract/cache.py", line 190, in run_and_cache result = self.get(namespace=namespace, key=key_args) File "/Users/luhongyang/opt/anaconda3/python.app/Contents/lib/python3.9/site-packages/tldextract/cache.py", line 93, in get raise KeyError("namespace: " + namespace + " key: " + repr(key)) KeyError: "namespace: publicsuffix.org-tlds key: {'urls': ('https://publicsuffix.org/list/public_suffix_list.dat', 'https://raw.githubusercontent.com/publicsuffix/list/master/public_suffix_list.dat'), 'fallback_to_snapshot': True}"
During handling of the above exception, another exception occurred:
Traceback (most recent call last): File "/Users/luhongyang/opt/anaconda3/python.app/Contents/lib/python3.9/site-packages/tldextract/cache.py", line 190, in run_and_cache result = self.get(namespace=namespace, key=key_args) File "/Users/luhongyang/opt/anaconda3/python.app/Contents/lib/python3.9/site-packages/tldextract/cache.py", line 93, in get raise KeyError("namespace: " + namespace + " key: " + repr(key)) KeyError: "namespace: urls key: {'url': 'https://publicsuffix.org/list/public_suffix_list.dat'}"
During handling of the above exception, another exception occurred:
Traceback (most recent call last): File "/Users/luhongyang/opt/anaconda3/python.app/Contents/lib/python3.9/site-packages/twisted/internet/defer.py", line 1697, in _inlineCallbacks result = context.run(gen.send, result) File "/Users/luhongyang/opt/anaconda3/python.app/Contents/lib/python3.9/site-packages/scrapy/core/downloader/middleware.py", line 64, in process_response method(request=request, response=response, spider=spider) File "/Users/luhongyang/opt/anaconda3/python.app/Contents/lib/python3.9/site-packages/scrapy/downloadermiddlewares/cookies.py", line 73, in process_response self._process_cookies(cookies, jar=jar, request=request) File "/Users/luhongyang/opt/anaconda3/python.app/Contents/lib/python3.9/site-packages/scrapy/downloadermiddlewares/cookies.py", line 44, in _process_cookies if cookie_domain and _is_public_domain(cookie_domain): File "/Users/luhongyang/opt/anaconda3/python.app/Contents/lib/python3.9/site-packages/scrapy/downloadermiddlewares/cookies.py", line 19, in _is_public_domain parts = _split_domain(domain) File "/Users/luhongyang/opt/anaconda3/python.app/Contents/lib/python3.9/site-packages/tldextract/tldextract.py", line 233, in call suffix_index = self._get_tld_extractor().suffix_index( File "/Users/luhongyang/opt/anaconda3/python.app/Contents/lib/python3.9/site-packages/tldextract/tldextract.py", line 274, in _get_tld_extractor public_tlds, private_tlds = get_suffix_lists( File "/Users/luhongyang/opt/anaconda3/python.app/Contents/lib/python3.9/site-packages/tldextract/suffix_list.py", line 55, in get_suffix_lists return cache.run_and_cache( File "/Users/luhongyang/opt/anaconda3/python.app/Contents/lib/python3.9/site-packages/tldextract/cache.py", line 192, in run_and_cache result = func(**kwargs) File "/Users/luhongyang/opt/anaconda3/python.app/Contents/lib/python3.9/site-packages/tldextract/suffix_list.py", line 72, in _get_suffix_lists text = find_first_response(cache, urls, cache_fetch_timeout=cache_fetch_timeout) File "/Users/luhongyang/opt/anaconda3/python.app/Contents/lib/python3.9/site-packages/tldextract/suffix_list.py", line 30, in find_first_response return cache.cached_fetch_url( File "/Users/luhongyang/opt/anaconda3/python.app/Contents/lib/python3.9/site-packages/tldextract/cache.py", line 199, in cached_fetch_url return self.run_and_cache( File "/Users/luhongyang/opt/anaconda3/python.app/Contents/lib/python3.9/site-packages/tldextract/cache.py", line 192, in run_and_cache result = func(**kwargs) File "/Users/luhongyang/opt/anaconda3/python.app/Contents/lib/python3.9/site-packages/tldextract/cache.py", line 209, in _fetch_url response = session.get(url, timeout=timeout) File "/Users/luhongyang/opt/anaconda3/python.app/Contents/lib/python3.9/site-packages/requests/sessions.py", line 600, in get return self.request("GET", url, **kwargs) File "/Users/luhongyang/opt/anaconda3/python.app/Contents/lib/python3.9/site-packages/requests/sessions.py", line 587, in request resp = self.send(prep, **send_kwargs) File "/Users/luhongyang/opt/anaconda3/python.app/Contents/lib/python3.9/site-packages/requests/sessions.py", line 701, in send r = adapter.send(request, **kwargs) File "/Users/luhongyang/opt/anaconda3/python.app/Contents/lib/python3.9/site-packages/requests/adapters.py", line 455, in send conn = self.get_connection(request.url, proxies) File "/Users/luhongyang/opt/anaconda3/python.app/Contents/lib/python3.9/site-packages/requests/adapters.py", line 352, in get_connection conn = proxy_manager.connection_from_url(url) File "/Users/luhongyang/opt/anaconda3/python.app/Contents/lib/python3.9/site-packages/urllib3/poolmanager.py", line 299, in connection_from_url return self.connection_from_host( File "/Users/luhongyang/opt/anaconda3/python.app/Contents/lib/python3.9/site-packages/urllib3/poolmanager.py", line 500, in connection_from_host return super(ProxyManager, self).connection_from_host( File "/Users/luhongyang/opt/anaconda3/python.app/Contents/lib/python3.9/site-packages/urllib3/poolmanager.py", line 246, in connection_from_host return self.connection_from_context(request_context) File "/Users/luhongyang/opt/anaconda3/python.app/Contents/lib/python3.9/site-packages/urllib3/poolmanager.py", line 261, in connection_from_context return self.connection_from_pool_key(pool_key, request_context=request_context) File "/Users/luhongyang/opt/anaconda3/python.app/Contents/lib/python3.9/site-packages/urllib3/poolmanager.py", line 282, in connection_from_pool_key pool = self._new_pool(scheme, host, port, request_context=request_context) File "/Users/luhongyang/opt/anaconda3/python.app/Contents/lib/python3.9/site-packages/urllib3/poolmanager.py", line 214, in _new_pool return pool_cls(host, port, **request_context) File "/Users/luhongyang/opt/anaconda3/python.app/Contents/lib/python3.9/site-packages/urllib3/connectionpool.py", line 938, in init HTTPConnectionPool.init( File "/Users/luhongyang/opt/anaconda3/python.app/Contents/lib/python3.9/site-packages/urllib3/connectionpool.py", line 198, in init self.pool = self.QueueCls(maxsize) TypeError: LifoQueue() takes no arguments
报错都非常奇怪 是在库函数里报的错 作者的代码应该没有问题 想问一下作者当时运行的版本以及我在目录下面都要创建些什么呢 非常感谢!!!!