node-scraper icon indicating copy to clipboard operation
node-scraper copied to clipboard

Incatchable errors

Open akidee opened this issue 14 years ago • 1 comments

When scraping google.de, I get:

Error: Invalid character: Invalid character in tag name: ){ at Object.createElement (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/level1/core.js:1190:13) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:128:35) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:171:7) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:171:7) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:171:7) at HtmlToDom.appendHtmlToElement (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:77:9) at Object.innerHTML (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/index.js:420:27) at Function.clean (/Users/andi/node_modules/scraper/deps/jquery-1.6.1.min.js:18:317) at Function.buildFragment (/Users/andi/node_modules/scraper/deps/jquery-1.6.1.min.js:17:31854) at [object Object].init (/Users/andi/node_modules/scraper/deps/jquery-1.6.1.min.js:16:7963) Error: Invalid character: Invalid character in tag name: ) at Object.createElement (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/level1/core.js:1190:13) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:128:35) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:171:7) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:171:7) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:171:7) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:171:7) at HtmlToDom.appendHtmlToElement (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:77:9) at Object.innerHTML (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/index.js:420:27) at Function.clean (/Users/andi/node_modules/scraper/deps/jquery-1.6.1.min.js:18:317) at Function.buildFragment (/Users/andi/node_modules/scraper/deps/jquery-1.6.1.min.js:17:31854) Error: Invalid character: Invalid character in tag name: ) at Object.createElement (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/level1/core.js:1190:13) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:128:35) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:171:7) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:171:7) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:171:7) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:171:7) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:171:7) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:171:7) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:171:7) at HtmlToDom.appendHtmlToElement (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:77:9) Error: Invalid character: Invalid character in tag name: ; at Object.createElement (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/level1/core.js:1190:13) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:128:35) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:171:7) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:171:7) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:171:7) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:171:7) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:171:7) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:171:7) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:171:7) at HtmlToDom.appendHtmlToElement (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:77:9) ...

Surely, HTML is usually not correct, but syntax tolerance is expected by clients. The scraping and analyzing seems to work, but the errors are not suppressable.

Source:

  var scraper;
  scraper = require('scraper');
  try {
    scraper({
      uri: 'http://google.de/',
      headers: {
        'User-Agent': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)'
      }
    }, function(e, $) {
      if (e) {
        throw err;
      }
      return $('body').each(function() {
        return console.log($(this).text() + '\n\n');
      });
    });
  } catch (e) {
    console.log('ERROR');
  }

akidee avatar Oct 30 '11 19:10 akidee

I have the same problem. I'm using Node v0.4.12 on Ubuntu 11.10.

jprichardson avatar Nov 04 '11 12:11 jprichardson