GoogleScraper
GoogleScraper copied to clipboard
(Http|Selenium) Only retrieving first result links
I'm trying to get the links for simple keyword "apple" from google search engine but its returning only 4 links( i.e. first result links).
Below is the code:
import sys
from pyvirtualdisplay import Display
from GoogleScraper import scrape_with_config, GoogleSearchError
from GoogleScraper.database import ScraperSearch, SERP, Link
def search():
#to make selenium work headless
display = Display(visible=0, size=(800, 600))
display.start()
config = {
'SCRAPING': {
'use_own_ip': 'True',
'keyword': 'apple',
'search_engines': 'google',
'num_results_per_page': 10,
'num_pages_for_keyword': 1,
'scrape_method': 'http'
},
'GLOBAL': {
'clean_cache_files': True,
'debug': 50,
'verbosity': 5,
'do_caching': True
},
'OUTPUT': {
'output_filename': 'urls.json'
}
}
try:
session = scrape_with_config(config)
except GoogleSearchError as e:
print(e)
# let's inspect what we got
for search in session.query(ScraperSearch).all():
for serp in search.serps:
#print(serp.links)
for link in serp.links:
print(link)
if __name__ == '__main__':
search()
Output
"effective_query": "0",
"id": "1",
"no_results": "False",
"num_results": "4",
"num_results_for_query": "About 1,49,00,00,000 results (0.54 seconds)\u00a0",
"page_number": "1",
"query": "apple",
"requested_at": "2015-08-04 09:48:15.449949",
"requested_by": "localhost",
"results": [
{
"domain": "www.apple.com",
"id": "1",
"link": "https://www.apple.com/in/",
"link_type": "results",
"rank": "1",
"serp_id": "1",
"snippet": "Apple designs and creates the iPhone, iPad, Mac notebooks and desktop computers, iOS 8, OS X, iPod and iTunes, and the new Apple Watch.",
"title": "Apple (India)",
"visible_link": "https://www.apple.com/in/"
},
{
"domain": "www.businessinsider.com",
"id": "2",
"link": "http://www.businessinsider.com/apple-in-talks-to-launch-an-mvno-in-the-us-and-europe-2015-8",
"link_type": "results",
"rank": "1",
"serp_id": "1",
"snippet": "Apple wants customers to pay it for data, calls and texts, and then switch between carriers for\u00a0...",
"title": "Apple is in talks to launch its own virtual network service in the US and Europe",
"visible_link": "Business Insider"
},
{
"domain": "www.wired.com",
"id": "3",
"link": "http://www.wired.com/2015/08/timepiece-wears-apple-watch-dont/",
"link_type": "results",
"rank": "2",
"serp_id": "1",
"snippet": "None",
"title": "This Timepiece Wears an Apple Watch So You Don't Have to | WIRED",
"visible_link": "Wired"
},
{
"domain": "www.marketwatch.com",
"id": "4",
"link": "http://www.marketwatch.com/story/apples-stock-officially-enters-correction-territory-as-it-crosses-below-key-technical-level-2015-08-03",
"link_type": "results",
"rank": "3",
"serp_id": "1",
"snippet": "None",
"title": "Apple's stock officially enters correction territory as it crosses below key technical level",
"visible_link": "MarketWatch"
}
],
"scrape_method": "http",
"search_engine_name": "google",
"status": "successful"
}]
Is it something wrong I'm doing? Please Help.
most likely a duplicate of #119