JiaHongmei
JiaHongmei
我试了按天爬取,但是爬到一个时间点后,它就开始重复爬取相同评论
def parse_by_hour(self, response): """以小时为单位筛选""" keyword = response.meta.get('keyword') is_empty = response.xpath( '//div[@class="card card-no-result s-pt20b40"]') if is_empty: print('当前页面搜索结果为空') else: # 解析当前页面 for weibo in self.parse_weibo(response): self.check_environment() yield weibo next_url = response.xpath( '//a[@class="next"]/@href').extract_first()...
def parse_by_hour(self, response): """以小时为单位筛选""" keyword = response.meta.get('keyword') is_empty = response.xpath( '//div[@class="card card-no-result s-pt20b40"]') if is_empty: print('当前页面搜索结果为空') else: # 解析当前页面 for weibo in self.parse_weibo(response): self.check_environment() yield weibo next_url = response.xpath( '//a[@class="next"]/@href').extract_first()...
请问怎么降低速度呢,cookie已经是最新的了