GoogleScraper icon indicating copy to clipboard operation
GoogleScraper copied to clipboard

(Http|Selenium) Only retrieving first result links

Open amar-myana opened this issue 10 years ago • 1 comments

I'm trying to get the links for simple keyword "apple" from google search engine but its returning only 4 links( i.e. first result links).

Below is the code:

import sys
from pyvirtualdisplay import Display
from GoogleScraper import scrape_with_config, GoogleSearchError
from GoogleScraper.database import ScraperSearch, SERP, Link

def search():
    #to make selenium work headless
    display = Display(visible=0, size=(800, 600))
    display.start()

    config = {
        'SCRAPING': {
            'use_own_ip': 'True',
            'keyword': 'apple',
            'search_engines': 'google',
            'num_results_per_page': 10,
            'num_pages_for_keyword': 1,
            'scrape_method': 'http'
        },
        'GLOBAL': {
            'clean_cache_files': True,
            'debug': 50,
            'verbosity': 5,
            'do_caching': True
        },
        'OUTPUT': {
            'output_filename': 'urls.json'
        }
    }

    try:
        session = scrape_with_config(config)
    except GoogleSearchError as e:
        print(e)

    # let's inspect what we got
    for search in session.query(ScraperSearch).all():
        for serp in search.serps:
            #print(serp.links)
            for link in serp.links:
                print(link)

if __name__ == '__main__':
         search()

Output

 "effective_query": "0",
  "id": "1",
  "no_results": "False",
  "num_results": "4",
  "num_results_for_query": "About 1,49,00,00,000 results (0.54 seconds)\u00a0",
  "page_number": "1",
  "query": "apple",
  "requested_at": "2015-08-04 09:48:15.449949",
  "requested_by": "localhost",
  "results": [
    {
      "domain": "www.apple.com",
      "id": "1",
      "link": "https://www.apple.com/in/",
      "link_type": "results",
      "rank": "1",
      "serp_id": "1",
      "snippet": "Apple designs and creates the iPhone, iPad, Mac notebooks and desktop computers, iOS 8, OS X, iPod and iTunes, and the new Apple Watch.",
      "title": "Apple (India)",
      "visible_link": "https://www.apple.com/in/"
    },
    {
      "domain": "www.businessinsider.com",
      "id": "2",
      "link": "http://www.businessinsider.com/apple-in-talks-to-launch-an-mvno-in-the-us-and-europe-2015-8",
      "link_type": "results",
      "rank": "1",
      "serp_id": "1",
      "snippet": "Apple wants customers to pay it for data, calls and texts, and then switch between carriers for\u00a0...",
      "title": "Apple is in talks to launch its own virtual network service in the US and Europe",
      "visible_link": "Business Insider"
    },
    {
      "domain": "www.wired.com",
      "id": "3",
      "link": "http://www.wired.com/2015/08/timepiece-wears-apple-watch-dont/",
      "link_type": "results",
      "rank": "2",
      "serp_id": "1",
      "snippet": "None",
      "title": "This Timepiece Wears an Apple Watch So You Don't Have to | WIRED",
      "visible_link": "Wired"
    },
    {
      "domain": "www.marketwatch.com",
      "id": "4",
      "link": "http://www.marketwatch.com/story/apples-stock-officially-enters-correction-territory-as-it-crosses-below-key-technical-level-2015-08-03",
      "link_type": "results",
      "rank": "3",
      "serp_id": "1",
      "snippet": "None",
      "title": "Apple's stock officially enters correction territory as it crosses below key technical level",
      "visible_link": "MarketWatch"
    }
  ],
  "scrape_method": "http",
  "search_engine_name": "google",
  "status": "successful"
}]

Is it something wrong I'm doing? Please Help.

amar-myana avatar Aug 04 '15 09:08 amar-myana

most likely a duplicate of #119

telethonic avatar Aug 08 '15 19:08 telethonic