kindle-open-books icon indicating copy to clipboard operation
kindle-open-books copied to clipboard

抓取soup乱码

Open take7yo opened this issue 8 years ago • 0 comments

from calibre.web.feeds.recipes import BasicNewsRecipe

class Python_Tutorial(BasicNewsRecipe):

title = 'Python Tutorial'
description = ''
cover_url = 'http://www.runoob.com/wp-content/uploads/2013/11/python.jpg'

url_prefix = 'http://www.runoob.com'
no_stylesheets = True
# 添加encoding也不行
encoding = 'utf-8'
keep_only_tags = [{ 'class': 'article-intro' }]

def get_title(self, link):
    return link.contents[0].strip()

def parse_index(self):
    soup = self.index_to_soup(self.url_prefix + '/python/python-tutorial.html')
    # 这里打印soup是乱码
    print(soup)
    div = soup.find('div', { 'id': 'leftcolumn' })

    articles = []
    for link in div.findAll('a'):
        if '#' in link['href']:
            continue

        if not '/python' in link['href']:
            continue

        til = self.get_title(link)
        url = self.url_prefix + link['href']
        a = { 'title': til, 'url': url }

        articles.append(a)

    ans = [('Python_Tutorial', articles)]

    return ans

take7yo avatar Dec 11 '16 06:12 take7yo