Reptile icon indicating copy to clipboard operation
Reptile copied to clipboard

台风脚本那里有点问题

Open nesteiner opened this issue 3 years ago • 0 comments

我手抄了一下台风历史信息的脚本,运行的时候发现总有一个错误发生
麻烦你帮我看一下哪里出错了

-*- mode: compilation; default-directory: "~/spider/spider/spiders/" -*-
Compilation started at Thu Mar  4 14:19:50

python3 typhoon.py
Traceback (most recent call last):
  File "typhoon.py", line 114, in <module>
    tfcraw.get_tf_detail()
  File "typhoon.py", line 62, in get_tf_detail
    tf_list = self.get_tf_list()
  File "typhoon.py", line 44, in get_tf_list
    year_list = self.get_year()
  File "typhoon.py", line 34, in get_year
    years = r.json()
  File "/home/steiner/.local/lib/python3.6/site-packages/requests/models.py", line 897, in json
    return complexjson.loads(self.text, **kwargs)
  File "/usr/lib/python3/dist-packages/simplejson/__init__.py", line 518, in loads
    return _default_decoder.decode(s)
  File "/usr/lib/python3/dist-packages/simplejson/decoder.py", line 370, in decode
    obj, end = self.raw_decode(s)
  File "/usr/lib/python3/dist-packages/simplejson/decoder.py", line 400, in raw_decode
    return self.scan_once(s, idx=_w(s, idx).end())
simplejson.errors.JSONDecodeError: Expecting value: line 1 column 1 (char 0)

Compilation exited abnormally with code 1 at Thu Mar  4 14:19:51

代码在这

import requests
from pymongo import MongoClient
import time
import random

class Typhoon:
    def __init__(self):
        self.user_agent = [
                           "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
                           "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
                           "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0",
                           "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; .NET4.0C; .NET4.0E; .NET CLR 2.0.50727; .NET CLR 3.0.30729; .NET CLR 3.5.30729; InfoPath.3; rv:11.0) like Gecko",
                           "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)",
    ]

        self.base_url = 'http://www.wztf121.com/data/complex/{}.json'
        self.headers = {
            'Cookie': '_gscu_1378142123=65572018r5on4x80; _gscbrs_1378142123=1; vjuids=30469f88b.16c835d32ea.0.8062809782e9b; vjlast=1565572019.1565572019.30; Hm_lvt_e592d6befa4f9918e6496980d22c5649=1565572019; Wa_lvt_1=1565572019; Wa_lpvt_1=1565576034; _gscs_1378142123=65572018v2ofkf80|pv:8; Hm_lpvt_e592d6befa4f9918e6496980d22c5649=1565576061',
            'Host': 'www.wztf121.com',
            'Referer': 'http://www.wztf121.com/history.html',
            'User-Agent': random.choice(self.user_agent)
        }

        self.client = MongoClient()
        self.db     = self.client.typhoon



    def get_year(self):
        year_list = []
        years_url = self.base_url.format('years')

        r = requests.get(years_url, headers = self.headers)
        years = r.json()

        for year in years:
            year_list.append(year['year'])

        print('以获取所有台风记录的年份')
        return year_list

    def get_tf_list(self):
        tf_list = []
        year_list = self.get_year()

        for year in year_list:
            url = self.base_url.format(year)

            r = requests.get(url, headers = self.headers)
            tfs = r.json()

            for tf in tfs:
                tfbh = tf['tfbh']
                tf_list.append(tfbh)

            time.sleep(random.random())

        print('已获得所有台风的编号,格式为 年份 + 次序')
        return tf_list

    def get_tf_detail(self):
        tf_list = self.get_tf_list()
        count = 1
        for tf in tf_list:
            tf_url = self.base_url.format(tf)
            r = requests.get(tf_url, headers = self.headers)
            tf_detail = r.json()

            begin_time = tf_detail[0]['begin_time']
            ename      = tf_detail[0]['ename']
            end_time   = tf_detail[0]['end_time']
            name       = tf_detail[0]['name']
            points     = tf_detail[0]['points']

            for point in points:
                latitude  = point['latitude']
                longitude = point['longitude']
                power     = point['power']
                speed     = point['speed']
                pressure  = point['pressure']
                strong    = point['strong']
                real_time = point['time']

                detail = {
                    'name': name,
                    'ename': ename,
                    'latitude': latitude,
                    'longitude': longitude,
                    'power': power,
                    'speed': speed,
                    'pressure': pressure,
                    'strong': strong,
                    'time': real_time,
                }
                self.db['detail'].insert_one(detail)


            time.sleep(5 * random.random())
            tf_info = {
                'name': name,
                'ename': ename,
                'begin_time': begin_time,
                'end_time': end_time,
            }

            self.db['info'].insert_one(tf_info)
            print('已存入第{}条台风详细信息!'.format(count))
            count += 1

                
            
        
tfcraw = Typhoon()
tfcraw.get_tf_detail()

nesteiner avatar Mar 04 '21 06:03 nesteiner