course22
course22 copied to clipboard
BUG: lesson1 notebook ddg image fetch returns 403
trafficstars
-
Bug:
urls = search_images('bird photos', max_images=1)returnsHTTP403ForbiddenError: HTTP Error 403: Forbidden -
The following is a quick fix (haven't checked it thoroughly):
from fastcore.all import *
import time
def search_images(term, max_images=200):
url = 'https://duckduckgo.com/'
res = urlread(url,data={'q':term})
searchObj = re.search(r'vqd=([\d-]+)\&', res)
requestUrl = url + 'i.js'
headers = {
'dnt': '1',
'accept-encoding': 'gzip, deflate, sdch, br',
'x-requested-with': 'XMLHttpRequest',
'accept-language': 'en-GB,en-US;q=0.8,en;q=0.6,ms;q=0.4',
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36',
'accept': 'application/json, text/javascript, */*; q=0.01',
'referer': 'https://duckduckgo.com/',
'authority': 'duckduckgo.com',
}
params = (
('l', 'wt-wt'),
('o', 'json'),
('q', term),
('vqd', searchObj.group(1)),
('f', ',,,'),
('p', '2')
)
urls,data = set(),{'next':1}
while len(urls)<max_images and 'next' in data:
res = requests.get(requestUrl, headers=headers, params=params)
data = json.loads(res.text)
urls.update(L(data['results']).itemgot('image'))
requestUrl = url + data['next']
time.sleep(0.2)
return L(urls)[:max_images]
- And this is the error:
