google-images-download
google-images-download copied to clipboard
Unfortunately all 20 could not be downloaded because some images were not downloadable
Hello,
does the framework work?
I just have installed it on Ubuntu 18.04 by the pip (inside the virtualenv) and when I'm trying with or without the chromimdriver I'm receiving the same message about the filter. I tried with another keywords or another limits - 20 or 100 and the output in every cases looks like that:
_$ googleimagesdownload --keywords "car" --limit 20 --chromedriver "/usr/local/bin/chromedriver"
Item no.: 1 --> Item name = car Evaluating... Starting Download...
Unfortunately all 20 could not be downloaded because some images were not downloadable. 0 is all we got for this search filter!
Errors: 0
Everything downloaded! Total errors: 0 Total time taken: 1.003023386001587 Seconds_
And created directories are empty.
I will be grateful for answer or possible reason why it could be that. Best, Radek
Same, on both my Mac and Windows 10, with or without Chromedriver.
See #298
same issue with me as well.
I same you too. I found the problem before about 2 weeks ago. So instead, it was replaced in another way. For example, below code is working for me.
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
import json
import os
import argparse
import requests
import urllib
import urllib3
from urllib3.exceptions import InsecureRequestWarning
import datetime
import time
urllib3.disable_warnings(InsecureRequestWarning)
searchword1 = 'cat'
searchword2 = 'dog''
searchword3 = 'cartoon'
searchurl = 'https://www.google.com/search?q=' + searchword1 + '+' + searchword2 + '+' + searchword3 + '&source=lnms&tbm=isch'
dirs = 'pictures'
maxcount = 1000
chromedriver = 'C://Program Files//chromedriver//chromedriver.exe'
if not os.path.exists(dirs):
os.mkdir(dirs)
def download_google_staticimages():
options = webdriver.ChromeOptions()
options.add_argument('--no-sandbox')
#options.add_argument('--headless')
try:
browser = webdriver.Chrome(chromedriver, options=options)
except Exception as e:
print(f'No found chromedriver in this environment.')
print(f'Install on your machine. exception: {e}')
sys.exit()
browser.set_window_size(1280, 1024)
browser.get(searchurl)
time.sleep(1)
print(f'Getting you a lot of images. This may take a few moments...')
element = browser.find_element_by_tag_name('body')
# Scroll down
#for i in range(30):
for i in range(50):
element.send_keys(Keys.PAGE_DOWN)
time.sleep(0.3)
try:
browser.find_element_by_id('smb').click()
for i in range(50):
element.send_keys(Keys.PAGE_DOWN)
time.sleep(0.3)
except:
for i in range(10):
element.send_keys(Keys.PAGE_DOWN)
time.sleep(0.3)
print(f'Reached end of page.')
time.sleep(0.5)
print(f'Retry')
time.sleep(0.5)
# Below is in japanese "show more result" sentences. Change this word to your lanaguage if you require.
browser.find_element_by_xpath('//input[@value="結果をもっと表示"]').click()
# Scroll down 2
for i in range(50):
element.send_keys(Keys.PAGE_DOWN)
time.sleep(0.3)
try:
browser.find_element_by_id('smb').click()
for i in range(50):
element.send_keys(Keys.PAGE_DOWN)
time.sleep(0.3)
except:
for i in range(10):
element.send_keys(Keys.PAGE_DOWN)
time.sleep(0.3)
#elements = browser.find_elements_by_xpath('//div[@id="islrg"]')
#page_source = elements[0].get_attribute('innerHTML')
page_source = browser.page_source
soup = BeautifulSoup(page_source, 'lxml')
images = soup.find_all('img')
urls = []
for image in images:
try:
url = image['data-src']
if not url.find('https://'):
urls.append(url)
except:
try:
url = image['src']
if not url.find('https://'):
urls.append(image['src'])
except Exception as e:
print(f'No found image sources.')
print(e)
count = 0
if urls:
for url in urls:
try:
res = requests.get(url, verify=False, stream=True)
rawdata = res.raw.read()
with open(os.path.join(dirs, 'img_' + str(count) + '.jpg'), 'wb') as f:
f.write(rawdata)
count += 1
except Exception as e:
print('Failed to write rawdata.')
print(e)
browser.close()
return count
# Main block
def main():
t0 = time.time()
count = download_google_staticimages()
t1 = time.time()
total_time = t1 - t0
print(f'\n')
print(f'Download completed. [Successful count = {count}].')
print(f'Total time is {str(total_time)} seconds.')
if __name__ == '__main__':
main()
Same issue
I same you too. I found the problem before about 2 weeks ago. So instead, it was replaced in another way. For example, below code is working for me.
from selenium import webdriver from selenium.webdriver.common.keys import Keys from bs4 import BeautifulSoup import json import os import argparse import requests import urllib import urllib3 from urllib3.exceptions import InsecureRequestWarning import datetime import time urllib3.disable_warnings(InsecureRequestWarning) searchword1 = 'cat' searchword2 = 'dog'' searchword3 = 'cartoon' searchurl = 'https://www.google.com/search?q=' + searchword1 + '+' + searchword2 + '+' + searchword3 + '&source=lnms&tbm=isch' dirs = 'pictures' maxcount = 1000 chromedriver = 'C://Program Files//chromedriver//chromedriver.exe' if not os.path.exists(dirs): os.mkdir(dirs) def download_google_staticimages(): options = webdriver.ChromeOptions() options.add_argument('--no-sandbox') #options.add_argument('--headless') try: browser = webdriver.Chrome(chromedriver, options=options) except Exception as e: print(f'No found chromedriver in this environment.') print(f'Install on your machine. exception: {e}') sys.exit() browser.set_window_size(1280, 1024) browser.get(searchurl) time.sleep(1) print(f'Getting you a lot of images. This may take a few moments...') element = browser.find_element_by_tag_name('body') # Scroll down #for i in range(30): for i in range(50): element.send_keys(Keys.PAGE_DOWN) time.sleep(0.3) try: browser.find_element_by_id('smb').click() for i in range(50): element.send_keys(Keys.PAGE_DOWN) time.sleep(0.3) except: for i in range(10): element.send_keys(Keys.PAGE_DOWN) time.sleep(0.3) print(f'Reached end of page.') time.sleep(0.5) print(f'Retry') time.sleep(0.5) # Below is in japanese "show more result" sentences. Change this word to your lanaguage if you require. browser.find_element_by_xpath('//input[@value="結果をもっと表示"]').click() # Scroll down 2 for i in range(50): element.send_keys(Keys.PAGE_DOWN) time.sleep(0.3) try: browser.find_element_by_id('smb').click() for i in range(50): element.send_keys(Keys.PAGE_DOWN) time.sleep(0.3) except: for i in range(10): element.send_keys(Keys.PAGE_DOWN) time.sleep(0.3) #elements = browser.find_elements_by_xpath('//div[@id="islrg"]') #page_source = elements[0].get_attribute('innerHTML') page_source = browser.page_source soup = BeautifulSoup(page_source, 'lxml') images = soup.find_all('img') urls = [] for image in images: try: url = image['data-src'] if not url.find('https://'): urls.append(url) except: try: url = image['src'] if not url.find('https://'): urls.append(image['src']) except Exception as e: print(f'No found image sources.') print(e) count = 0 if urls: for url in urls: try: res = requests.get(url, verify=False, stream=True) rawdata = res.raw.read() with open(os.path.join(dirs, 'img_' + str(count) + '.jpg'), 'wb') as f: f.write(rawdata) count += 1 except Exception as e: print('Failed to write rawdata.') print(e) browser.close() return count # Main block def main(): t0 = time.time() count = download_google_staticimages() t1 = time.time() total_time = t1 - t0 print(f'\n') print(f'Download completed. [Successful count = {count}].') print(f'Total time is {str(total_time)} seconds.') if __name__ == '__main__': main()
your code works but it download only thumbnail :'(
I'm having the same issue too. Just a month ago, I was successfully using google_images_download to get Google Images for each Wikipedia article here (https://www.youtube.com/watch?v=PranVQik0yg). Unfortunately, while trying to download images for the sequel now, I only get 0 images downloaded. Perhaps 5% of the time, it actually downloads images, but that isn't reliable enough. This is what I normally see:
Item no.: 1 --> Item name = dog
Evaluating...
Starting Download...
Unfortunately all 100 could not be downloaded because some images were not downloadable. 0 is all we got for this search filter!
Errors: 0
({'dog': []}, 0)
Same issue
I think google might have changed something. Its rather wired since I also tried some other scrawlers but they all failed like this one.
Same issue
@rares-preda, @arthursdays, @Interesting6 and all, I could not get the google/bing scraper to work correctly, so I created a simple Flickr image scraper instead which works well. You can use it here: https://github.com/ultralytics/flickr_scraper
Enjoy!
this is what i do:
`keyword = "fire hydrant" arguments = {"keywords":keyword, "limit":40, "print_urls": False, "silent_mode": True, # "usage_rights": "labeled-for-reuse", "output_directory": str(data_path)} #creating list of arguments
for item in range(100): #passing the arguments to the function paths = response.download(arguments) if paths[1] > 0: break; print("got {} images on attempt number {}".format(len(paths[0][keyword]), item+1) ) `
Same issue
same issue here
@cli0 @Jiho-korea @utterworks I've updated the Bing scraper with a few improvements in the repo below. Pass a --chromedriver path for all searches, and optionally --download.
https://github.com/ultralytics/google-images-download
$ git clone https://github.com/ultralytics/google-images-download
$ cd google-images-download
$ python3 bing_scraper.py --search 'honeybees on flowers' --limit 10 --download --chromedriver /Users/glennjocher/Downloads/chromedriver
Searching for https://www.bing.com/images/search?q=honeybees%20on%20flowers
Downloading HTML... 3499588 elements: 30it [00:24, 1.21it/s]
Downloading images...
1/10 https://upload.wikimedia.org/wikipedia/commons/thumb/4/4d/Apis_mellifera_Western_honey_bee.jpg/1200px-Apis_mellifera_Western_honey_bee.jpg
2/10 https://berkshirefarmsapiary.files.wordpress.com/2013/07/imgp8415.jpg
3/10 http://www.pestworld.org/media/561900/honey-bee-foraging-side-view.jpg
4/10 https://www.gannett-cdn.com/-mm-/da6df33e2de11997d965f4d81915ba4d1bd4586e/c=0-248-3131-2017/local/-/media/2017/06/22/USATODAY/USATODAY/636337466517310122-GettyImages-610156450.jpg
5/10 http://4.bp.blogspot.com/-b9pA6loDnsw/TY0GjKtyDCI/AAAAAAAAAD8/jHdZ5O40CeQ/s1600/bees.jpg
6/10 https://d3i6fh83elv35t.cloudfront.net/static/2019/02/Honey_bee_Apis_mellifera_CharlesJSharpCC-1024x683.jpg
7/10 http://www.fnal.gov/pub/today/images05/bee.jpg
8/10 https://upload.wikimedia.org/wikipedia/commons/5/55/Honey_Bee_on_Willow_Catkin_(5419305106).jpg
9/10 https://cdnimg.in/wp-content/uploads/2015/06/HoneyBeeW-1024x1024.jpg
10/10 http://www.pouted.com/wp-content/uploads/2015/03/honeybee_06_by_wings_of_light-d3fhfg1.jpg
Done with 0 errors in 37.1s. All images saved to /Users/glennjocher/PycharmProjects/google-images-download/images

Hi guys, I just figured out a workaround to make me able to download original images from google again. I forked another repo by WuLC and modified his download_with_selenium.py to make it work again. I have also submitted a pull request. Now you can find the working version here. Note that I only changed the file download_with_selenium.py and it is the only one that works for me. Hope this one can help!
glenn-jocher 's approach to download from other website is verified working, to extend this contribution to Windows, I hereby elaborate the steps:
-
Download Chrome driver ( Go to https://chromedriver.chromium.org/downloads )
If you are using Chrome version 80.0.3987.132 (Click on Ghrome > Help > About Google Chrome to find out ),
select this link for ChromeDriver 80.0.3987.106 https://chromedriver.storage.googleapis.com/index.html?path=80.0.3987.106/
Save the chromedriver_win32.zip to C:\Software
Unzip chromedriver_win32.zip, the file should be chromedriver.exe. -
Install needed Python package to run this script Open dos command prompt
pip install tqdm
-
Download bing_scraper.py using git
git clone https://github.com/ultralytics/google-images-download
-
Run the python script to download images
cd google-images-download python bing_scraper.py --search "honeybees on flowers" --limit 10 --download --chromedriver "C:\Software\chromedriver.exe"
The image should be saved in a new directroy honeybees_on_flowers
Such as C:\Users[user name]\google-images-download\images\honeybees_on_flowers
cd google-images-download python bing_scraper.py --search "blue tit" -s ">400*300" -f "jpg" --limit 100 --download --chromedriver "C:\Software\chromedriver.exe"
The image should be saved in a new directroy blue tit Such as C:\Users[user name]\google-images-download\images\blue tit
glenn-jocher 's approach to download from other website is verified working, to extend this contribution to Windows, I hereby elaborate the steps:
Download Chrome driver ( Go to https://chromedriver.chromium.org/downloads ) If you are using Chrome version 80.0.3987.132 (Click on Ghrome > Help > About Google Chrome to find out ), select this link for ChromeDriver 80.0.3987.106 https://chromedriver.storage.googleapis.com/index.html?path=80.0.3987.106/ Save the chromedriver_win32.zip to C:\Software Unzip chromedriver_win32.zip, the file should be chromedriver.exe.
Install needed Python package to run this script Open dos command prompt
pip install tqdm
Download bing_scraper.py using git
git clone https://github.com/ultralytics/google-images-download
Run the python script to download images
cd google-images-download python bing_scraper.py --search "honeybees on flowers" --limit 10 --download --chromedriver "C:\Software\chromedriver.exe"
The image should be saved in a new directroy honeybees_on_flowers Such as C:\Users[user name]\google-images-download\images\honeybees_on_flowers
cd google-images-download python bing_scraper.py --search "blue tit" -s ">400*300" -f "jpg" --limit 100 --download --chromedriver "C:\Software\chromedriver.exe"
The image should be saved in a new directroy blue tit Such as C:\Users[user name]\google-images-download\images\blue tit
This works but I don't think the -s argument is being parsed correctly. I ran 'oven roasted potatoes' with -s ">400*300" then again with -a "wide" then again with neither and got the same results each time.
Yes, this is effective for google image download (). If you use "search" and most of arguments would be not in use. You can see his code, actually forked from this package.
Thank you so much! I was able to get this downloading following jackeylhm's instructions for windows. You saved my capstone project :)
glenn-jocher 's approach to download from other website is verified working, to extend this contribution to Windows, I hereby elaborate the steps:
Download Chrome driver ( Go to https://chromedriver.chromium.org/downloads ) If you are using Chrome version 80.0.3987.132 (Click on Ghrome > Help > About Google Chrome to find out ), select this link for ChromeDriver 80.0.3987.106 https://chromedriver.storage.googleapis.com/index.html?path=80.0.3987.106/ Save the chromedriver_win32.zip to C:\Software Unzip chromedriver_win32.zip, the file should be chromedriver.exe.
Install needed Python package to run this script Open dos command prompt
pip install tqdm
Download bing_scraper.py using git
git clone https://github.com/ultralytics/google-images-download
Run the python script to download images
cd google-images-download python bing_scraper.py --search "honeybees on flowers" --limit 10 --download --chromedriver "C:\Software\chromedriver.exe"
The image should be saved in a new directroy honeybees_on_flowers Such as C:\Users[user name]\google-images-download\images\honeybees_on_flowers
cd google-images-download python bing_scraper.py --search "blue tit" -s ">400*300" -f "jpg" --limit 100 --download --chromedriver "C:\Software\chromedriver.exe"
The image should be saved in a new directroy blue tit Such as C:\Users[user name]\google-images-download\images\blue tit
This works but I don't think the -s argument is being parsed correctly. I ran 'oven roasted potatoes' with -s ">400*300" then again with -a "wide" then again with neither and got the same results each time.
Good finding. Thanks.
@arthursdays I run the download_with_selenium.py and it shows a list of "Start downloading with" and "Finish downloading all images", but only the keyword directories are created and no firefox window opened. Did I miss something?
@vader1359 Have you download the geckodriver and add the path? And is the first time you setup the firefox? If yes, restart your computer and try again.
@arthursdays thank you very much! I had been looking for some days for a solution and yours was the one that finally worked! And also thank you @liuzaozao7799 , if it wasn't for your comment I would still not be able to make this script work.
Just to add a hint: the script does not support reading search terms from a file and I was confused between -kf (key word file) and --search. So if you have search terms in a text file (searchwords.txt) you need to create a small batch to loop through all search terms.
For example in Windows: search_searchwords.bat @echo off FOR /F %%G IN (searchwords.txt) DO py bing_scraper.py --search %%G --limit 3 --download --chromedriver "C:\Users\ossi\Downloads\chromedriver_win32\chromedriver.exe"
I'm really new to python. So probably I say something stupid however I could really use your help! When I use jackeylhm his method I get stuck in step 4. It generates the folders but they stay empty. Any tips would be great!!!! These are my errors:
Searching for https://www.bing.com/images/search?q=blue%20tit
Traceback (most recent call last):
File "bing_scraper.py", line 936, in
found it out myself : D
pip install selenium
One note from my side:
If you are using bing_scraper.py with Python3 you have to comment/delete the line import ssl as it is now a part of the standard python library.
@glenn-jocher Thanks for the fix! Could you also give an example of how to use your modified code to do an automated search and download of multiple search words? When using a config file, it resorts back to using the google search engine.
And then I tried this kind of Python code and it didn't work (Please excuse my ignorance. I'm new to programming):
import bing_scraper
shrooms = ["Psilocybe cubensis", "Psilocybe semilanceata", "Psilocybe baeocystis", "Psilocybe mexicana", "Psilocybe cyanescens", "Psilocybe azurescens" ]
for x in shrooms: --search x --limit 100 --download --chromedriver /home/lam/ml/google-images-download/chromedriver
@lamcnguyen89 you can do bash for loop:
git clone https://github.com/ultralytics/google-images-download
cd google-images-download
for x in 'bees' 'flowers' 'bees and flowers'
do
python3 bing_scraper.py --search $x --limit 10 --download --chromedriver /Users/glennjocher/Downloads/chromedriver
done
Output is:
Searching for https://www.bing.com/images/search?q=bees
Downloading HTML... 3529574 elements: 100%|██████████████████████████████████████████████████████████████████████████| 30/30 [00:21<00:00, 1.42it/s]
Downloading images...
1/10 https://s.newsweek.com/sites/www.newsweek.com/files/styles/full/public/2018/09/25/bee-stock.jpg
2/10 https://d1o50x50snmhul.cloudfront.net/wp-content/uploads/2017/06/29180017/rexfeatures_7457873a.jpg
3/10 URLError on an image...trying next one... Error: HTTP Error 404: Not Found
3/10 https://www.readersdigest.ca/wp-content/uploads/sites/14/2011/02/bee-myths-making-honey.jpg
4/10 https://3c1703fe8d.site.internapcdn.net/newman/gfx/news/2017/howhoneybeeg.jpg
5/10 http://www.sciencebuzz.org/sites/default/files/images/bees.jpg
6/10 http://www.pestworld.org/media/561900/honey-bee-foraging-side-view.jpg
7/10 https://media.boingboing.net/wp-content/uploads/2018/09/bees.jpg
8/10 https://abcwildlife.com/wp-content/uploads/2016/08/Honey-Bee-1.jpg
9/10 https://static01.nyt.com/images/2016/02/10/science/10OBS-BEES/10OBS-BEES-superJumbo.jpg
10/10 https://bees4life.org/wp-content/uploads/2018/03/german.jpg
Done with 1 errors in 34.9s. All images saved to /Users/glennjocher/PycharmProjects/google-images-download/images
Searching for https://www.bing.com/images/search?q=flowers
Downloading HTML... 3911650 elements: 100%|██████████████████████████████████████████████████████████████████████████| 30/30 [00:20<00:00, 1.44it/s]
Downloading images...
1/10 https://upload.wikimedia.org/wikipedia/commons/thumb/9/9c/Cosmos_bipinnatus_pink%2C_Burdwan%2C_West_Bengal%2C_India_10_01_2013.jpg/1200px-Cosmos_bipinnatus_pink%2C_Burdwan%2C_West_Bengal%2C_India_10_01_2013.jpg
2/10 https://www.thespruce.com/thmb/SiO7HQpE-6kFDbwpTz6CKNGO1Rw=/2119x1415/filters:fill(auto,1)/GettyImages-971582964-ee0f28aa66b04fb1a54171fa4bdee7a6.jpg
3/10 https://i.ytimg.com/vi/ryUxrFUk6MY/maxresdefault.jpg
4/10 https://static.onecms.io/wp-content/uploads/sites/37/2019/01/18211412/gaillardia-arizona-sun-4f15dce1.jpg
5/10 https://www.gardeningknowhow.com/wp-content/uploads/2014/07/blanket-flowers.jpg
6/10 https://i.ytimg.com/vi/QGFaKLkocwQ/maxresdefault.jpg
7/10 http://3.bp.blogspot.com/-jBVNc3D-qr0/UAgQZ6ZTPuI/AAAAAAAADOE/8uodPPtx3X0/s1600/Beautiful_flowers_zastavki_com_18901_2.jpg
8/10 http://2.bp.blogspot.com/-RGCK63K8Y6s/UCkbAn3S4xI/AAAAAAAABa0/rv89TeHFzeY/s1600/Prison+flowers+8.jpg
9/10 https://1.bp.blogspot.com/-1l4T6MDQgqo/WSKvmmZ-KJI/AAAAAAAAAyM/xD2vAPs6JF8pLJl6lhnceuRdKWntNYovACLcB/s1600/9.jpg
10/10 https://i.ytimg.com/vi/aS-LUW5Jim0/maxresdefault.jpg
Done with 0 errors in 28.4s. All images saved to /Users/glennjocher/PycharmProjects/google-images-download/images
Searching for https://www.bing.com/images/search?q=bees%20and%20flowers
Downloading HTML... 3532461 elements: 100%|██████████████████████████████████████████████████████████████████████████| 30/30 [00:21<00:00, 1.41it/s]
Downloading images...
1/10 https://photomemorie.files.wordpress.com/2012/05/flower-and-bees1.jpg
2/10 URLError on an image...trying next one... Error: HTTP Error 404: Not Found
2/10 https://www.gildshire.com/wp-content/uploads/2017/04/Bees-and-Flowers.jpg
3/10 http://howfacecare.com/wp-content/uploads/2016/06/take_right_critical_steps_to_an_allergic_reaction_to_a_bee_sting.jpg
4/10 http://i2.cdn.turner.com/money/dam/assets/170210115106-pollinating-bee-780x439.jpg
5/10 https://www.mybeeline.co/media/cache/full/posts/bee-on-flower.jpg
6/10 http://www.bibliotecapleyades.net/imagenes_ciencia/bees63_01.jpg
7/10 https://berkshirefarmsapiary.files.wordpress.com/2013/07/imgp8415.jpg
8/10 http://www.birdsandblooms.com/wp-content/uploads/2015/01/Flowers_That_Attract_Bees_Calendula.jpg
9/10 https://img-aws.ehowcdn.com/877x500p/s3-us-west-1.amazonaws.com/contentlab.studiod/getty/a898f65f7f134c6492d8a4ed24eadb3e Invalid or missing image format. Skipping...
9/10 https://berksandschuylkillbeekeepers.org/wp-content/uploads/circle-image-slider-with-lightbox/bees_on_flower.jpg
10/10 http://jonlieffmd.com/wp-content/uploads/2012/11/Bee-and-flower-1.jpg
Done with 2 errors in 38.8s. All images saved to /Users/glennjocher/PycharmProjects/google-images-download/images