cola
cola copied to clipboard
dev抓取微博报错
抓取微博的时候不知道为什么 parsers的176行会报错 mblog.created = parse(div.select('a.S_link2.WB_time')[0]['title'])
以下是错误信息 D:\cola\contrib\weibo>init.py D:\cola\cola\core\opener.py:108: UserWarning: gzip transfer encoding is experimental! self.browser.set_handle_gzip(True) start to process priority: 0 process bundle from priority 0 get 3211200050 url: http://weibo.com/aj/mblog/mbloglist?count=15&pre_page=1&uid=3211200050&end_id=3786010796038435&_t=0&_k=1418233717575000&__rnd=1418233835289&pagebar=0&max_id=3751405376185938&page=1 D:\cola\cola\core\opener.py:108: UserWarning: gzip transfer encoding is experimental! self.browser.set_handle_gzip(True) start to process priority: 0 process bundle from priority 0 get 1898353550 url: http://weibo.com/aj/mblog/mbloglist?count=15&pre_page=1&uid=1898353550&end_id=3786306393521083&_t=0&_k=1418233717932000&__rnd=1418233844764&pagebar=0&max_id=3778673397938545&page=1 Error when handle bundle: 3211200050, url: http://weibo.com/aj/mblog/mbloglist?count=15&pre_page=1&uid=3211200050&end_id=3786010796038435&_t=0&_k=14182337175750 00&__rnd=1418233835289&pagebar=0&max_id=3751405376185938&page=1 list index out of range Traceback (most recent call last): File "D:\cola\cola\job\executor.py", line 519, in _parse_with_process_exception res = self._parse(parser_cls, options, bundle, url) File "D:\cola\cola\job\executor.py", line 442, in _parse _options).parse() File "D:\cola\contrib\weibo\parsers.py", line 177, in parse mblog.created = parse(div.select('a.S_link2.WB_time')[0]['title']) IndexError: list index out of range Error when handle bundle: 1898353550, url: http://weibo.com/aj/mblog/mbloglist?count=15&pre_page=1&uid=1898353550&end_id=3786306393521083&_t=0&_k=14182337179320 00&__rnd=1418233844764&pagebar=0&max_id=3778673397938545&page=1 list index out of range Traceback (most recent call last): File "D:\cola\cola\job\executor.py", line 519, in _parse_with_process_exception res = self._parse(parser_cls, options, bundle, url) File "D:\cola\cola\job\executor.py", line 442, in _parse *options).parse() File "D:\cola\contrib\weibo\parsers.py", line 177, in parse mblog.created = parse(div.select('a.S_link2.WB_time')[0]['title']) IndexError: list index out of range get 3211200050 url: http://weibo.com/aj/mblog/mbloglist?count=15&pre_page=1&uid=3211200050&end_id=3786010796038435&_t=0&_k=1418233717575000&__rnd=1418233835289&pagebar=0&max_id=3751405376185938&page=1 get 1898353550 url: http://weibo.com/aj/mblog/mbloglist?count=15&pre_page=1&uid= 1898353550&end_id=3786306393521083&_t=0&_k=1418233717932000&__rnd=1418233844764& pagebar=0&max_id=3778673397938545&page=1 Error when handle bundle: 3211200050, url: http://weibo.com/aj/mblog/mbloglist?count=15&pre_page=1&uid=3211200050&end_id=3786010796038435&_t=0&_k=14182337175750 00&__rnd=1418233835289&pagebar=0&max_id=3751405376185938&page=1 list index out of range Traceback (most recent call last): File "D:\cola\cola\job\executor.py", line 519, in _parse_with_process_exception res = self._parse(parser_cls, options, bundle, url) File "D:\cola\cola\job\executor.py", line 442, in _parse *options).parse() File "D:\cola\contrib\weibo\parsers.py", line 177, in parse mblog.created = parse(div.select('a.S_link2.WB_time')[0]['title']) IndexError: list index out of range Error when handle bundle: 1898353550, url: http://weibo.com/aj/mblog/mbloglist?count=15&pre_page=1&uid=1898353550&end_id=3786306393521083&_t=0&_k=14182337179320 00&__rnd=1418233844764&pagebar=0&max_id=3778673397938545&page=1 list index out of range Traceback (most recent call last): File "D:\cola\cola\job\executor.py", line 519, in _parse_with_process_exception res = self._parse(parser_cls, options, bundle, url) File "D:\cola\cola\job\executor.py", line 442, in _parse *options).parse() File "D:\cola\contrib\weibo\parsers.py", line 177, in parse mblog.created = parse(div.select('a.S_link2.WB_time')[0]['title']) IndexError: list index out of range get 3211200050 url: http://weibo.com/aj/mblog/mbloglist?count=15&pre_page=1&uid=3211200050&end_id=3786010796038435&_t=0&_k=1418233717575000&__rnd=1418233835289&pagebar=0&max_id=3751405376185938&page=1 get 1898353550 url: http://weibo.com/aj/mblog/mbloglist?count=15&pre_page=1&uid=1898353550&end_id=3786306393521083&_t=0&_k=1418233717932000&__rnd=1418233844764&pagebar=0&max_id=3778673397938545&page=1 Error when handle bundle: 3211200050, url: http://weibo.com/aj/mblog/mbloglist?count=15&pre_page=1&uid=3211200050&end_id=3786010796038435&_t=0&_k=14182337175750 00&__rnd=1418233835289&pagebar=0&max_id=3751405376185938&page=1 list index out of range Traceback (most recent call last): File "D:\cola\cola\job\executor.py", line 519, in _parse_with_process_exception res = self._parse(parser_cls, options, bundle, url) File "D:\cola\cola\job\executor.py", line 442, in _parse *options).parse() File "D:\cola\contrib\weibo\parsers.py", line 177, in parse mblog.created = parse(div.select('a.S_link2.WB_time')[0]['title']) IndexError: list index out of range get 1898353550 url: http://weibo.com/aj/mblog/mbloglist?count=15&pre_page=1&uid=1898353550&end_id=3786306393521083&_t=0&_k=1418233717932000&__rnd=1418234623885&pagebar=1&max_id=3734740953372321&page=1 get 1898353550 url: http://weibo.com/aj/mblog/mbloglist?count=50&pre_page=1&uid=1898353550&end_id=3786306393521083&_t=0&_k=1418233717932000&__rnd=1418234624721&page=2 get 1898353550 url: http://weibo.com/aj/mblog/mbloglist?count=15&pre_page=2&uid=1898353550&end_id=3786306393521083&_t=0&_k=1418233717932000&__rnd=1418234625316&pagebar=0&max_id=3656888933158899&page=2 Error when handle bundle: 1898353550, url: http://weibo.com/aj/mblog/mbloglist?count=15&pre_page=2&uid=1898353550&end_id=3786306393521083&_t=0&_k=14182337179320 00&__rnd=1418234625316&pagebar=0&max_id=3656888933158899&page=2 list index out of range Traceback (most recent call last): File "D:\cola\cola\job\executor.py", line 519, in _parse_with_process_exception res = self._parse(parser_cls, options, bundle, url) File "D:\cola\cola\job\executor.py", line 442, in _parse *options).parse() File "D:\cola\contrib\weibo\parsers.py", line 177, in parse mblog.created = parse(div.select('a.S_link2.WB_time')[0]['title']) IndexError: list index out of range get 3211200050 url: http://weibo.com/aj/mblog/mbloglist?count=15&pre_page=1&uid=3211200050&end_id=3786010796038435&_t=0&_k=1418233717575000&__rnd=1418233835289& pagebar=0&max_id=3751405376185938&page=1 Error when handle bundle: 3211200050, url: http://weibo.com/aj/mblog/mbloglist?count=15&pre_page=1&uid=3211200050&end_id=3786010796038435&_t=0&_k=14182337175750 00&__rnd=1418233835289&pagebar=0&max_id=3751405376185938&page=1 list index out of range Traceback (most recent call last): File "D:\cola\cola\job\executor.py", line 519, in _parse_with_process_exception res = self._parse(parser_cls, options, bundle, url) File "D:\cola\cola\job\executor.py", line 442, in _parse *_options).parse() File "D:\cola\contrib\weibo\parsers.py", line 177, in parse mblog.created = parse(div.select('a.S_link2.WB_time')[0]['title']) IndexError: list index out of range get 1898353550 url: http://weibo.com/aj/mblog/mbloglist?count=15&pre_page=2&uid=1898353550&end_id=3786306393521083&_t=0&_k=1418233717932000&__rnd=1418234625316&pagebar=0&max_id=3656888933158899&page=2 get 3211200050 url: http://weibo.com/aj/mblog/mbloglist?count=15&pre_page=1&uid=3211200050&end_id=3786010796038435&_t=0&_k=1418233717575000&__rnd=1418233835289&pagebar=0&max_id=3751405376185938&page=1 Error when handle bundle: 3211200050, url: http://weibo.com/aj/mblog/mbloglist?count=15&pre_page=1&uid=3211200050&end_id=3786010796038435&_t=0&_k=14182337175750 00&__rnd=1418233835289&pagebar=0&max_id=3751405376185938&page=1 list index out of range Traceback (most recent call last): File "D:\cola\cola\job\executor.py", line 519, in _parse_with_process_exception res = self._parse(parser_cls, options, bundle, url) File "D:\cola\cola\job\executor.py", line 442, in _parse **options).parse() File "D:\cola\contrib\weibo\parsers.py", line 177, in parse mblog.created = parse(div.select('a.S_link2.WB_time')[0]['title']) IndexError: list index out of range
这是个历史遗留问题了,有的账号还不能重现。如果我不能重现问题的话,还需要你提供相关的原始文件之类的。
好没问题:)