Reptile
Reptile copied to clipboard
台风脚本那里有点问题
我手抄了一下台风历史信息的脚本,运行的时候发现总有一个错误发生
麻烦你帮我看一下哪里出错了
-*- mode: compilation; default-directory: "~/spider/spider/spiders/" -*-
Compilation started at Thu Mar 4 14:19:50
python3 typhoon.py
Traceback (most recent call last):
File "typhoon.py", line 114, in <module>
tfcraw.get_tf_detail()
File "typhoon.py", line 62, in get_tf_detail
tf_list = self.get_tf_list()
File "typhoon.py", line 44, in get_tf_list
year_list = self.get_year()
File "typhoon.py", line 34, in get_year
years = r.json()
File "/home/steiner/.local/lib/python3.6/site-packages/requests/models.py", line 897, in json
return complexjson.loads(self.text, **kwargs)
File "/usr/lib/python3/dist-packages/simplejson/__init__.py", line 518, in loads
return _default_decoder.decode(s)
File "/usr/lib/python3/dist-packages/simplejson/decoder.py", line 370, in decode
obj, end = self.raw_decode(s)
File "/usr/lib/python3/dist-packages/simplejson/decoder.py", line 400, in raw_decode
return self.scan_once(s, idx=_w(s, idx).end())
simplejson.errors.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
Compilation exited abnormally with code 1 at Thu Mar 4 14:19:51
代码在这
import requests
from pymongo import MongoClient
import time
import random
class Typhoon:
def __init__(self):
self.user_agent = [
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
"Mozilla/5.0 (Windows NT 10.0; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0",
"Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; .NET4.0C; .NET4.0E; .NET CLR 2.0.50727; .NET CLR 3.0.30729; .NET CLR 3.5.30729; InfoPath.3; rv:11.0) like Gecko",
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)",
]
self.base_url = 'http://www.wztf121.com/data/complex/{}.json'
self.headers = {
'Cookie': '_gscu_1378142123=65572018r5on4x80; _gscbrs_1378142123=1; vjuids=30469f88b.16c835d32ea.0.8062809782e9b; vjlast=1565572019.1565572019.30; Hm_lvt_e592d6befa4f9918e6496980d22c5649=1565572019; Wa_lvt_1=1565572019; Wa_lpvt_1=1565576034; _gscs_1378142123=65572018v2ofkf80|pv:8; Hm_lpvt_e592d6befa4f9918e6496980d22c5649=1565576061',
'Host': 'www.wztf121.com',
'Referer': 'http://www.wztf121.com/history.html',
'User-Agent': random.choice(self.user_agent)
}
self.client = MongoClient()
self.db = self.client.typhoon
def get_year(self):
year_list = []
years_url = self.base_url.format('years')
r = requests.get(years_url, headers = self.headers)
years = r.json()
for year in years:
year_list.append(year['year'])
print('以获取所有台风记录的年份')
return year_list
def get_tf_list(self):
tf_list = []
year_list = self.get_year()
for year in year_list:
url = self.base_url.format(year)
r = requests.get(url, headers = self.headers)
tfs = r.json()
for tf in tfs:
tfbh = tf['tfbh']
tf_list.append(tfbh)
time.sleep(random.random())
print('已获得所有台风的编号,格式为 年份 + 次序')
return tf_list
def get_tf_detail(self):
tf_list = self.get_tf_list()
count = 1
for tf in tf_list:
tf_url = self.base_url.format(tf)
r = requests.get(tf_url, headers = self.headers)
tf_detail = r.json()
begin_time = tf_detail[0]['begin_time']
ename = tf_detail[0]['ename']
end_time = tf_detail[0]['end_time']
name = tf_detail[0]['name']
points = tf_detail[0]['points']
for point in points:
latitude = point['latitude']
longitude = point['longitude']
power = point['power']
speed = point['speed']
pressure = point['pressure']
strong = point['strong']
real_time = point['time']
detail = {
'name': name,
'ename': ename,
'latitude': latitude,
'longitude': longitude,
'power': power,
'speed': speed,
'pressure': pressure,
'strong': strong,
'time': real_time,
}
self.db['detail'].insert_one(detail)
time.sleep(5 * random.random())
tf_info = {
'name': name,
'ename': ename,
'begin_time': begin_time,
'end_time': end_time,
}
self.db['info'].insert_one(tf_info)
print('已存入第{}条台风详细信息!'.format(count))
count += 1
tfcraw = Typhoon()
tfcraw.get_tf_detail()