node-scraper
node-scraper copied to clipboard
Incatchable errors
When scraping google.de, I get:
Error: Invalid character: Invalid character in tag name: ){ at Object.createElement (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/level1/core.js:1190:13) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:128:35) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:171:7) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:171:7) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:171:7) at HtmlToDom.appendHtmlToElement (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:77:9) at Object.innerHTML (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/index.js:420:27) at Function.clean (/Users/andi/node_modules/scraper/deps/jquery-1.6.1.min.js:18:317) at Function.buildFragment (/Users/andi/node_modules/scraper/deps/jquery-1.6.1.min.js:17:31854) at [object Object].init (/Users/andi/node_modules/scraper/deps/jquery-1.6.1.min.js:16:7963) Error: Invalid character: Invalid character in tag name: ) at Object.createElement (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/level1/core.js:1190:13) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:128:35) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:171:7) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:171:7) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:171:7) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:171:7) at HtmlToDom.appendHtmlToElement (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:77:9) at Object.innerHTML (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/index.js:420:27) at Function.clean (/Users/andi/node_modules/scraper/deps/jquery-1.6.1.min.js:18:317) at Function.buildFragment (/Users/andi/node_modules/scraper/deps/jquery-1.6.1.min.js:17:31854) Error: Invalid character: Invalid character in tag name: ) at Object.createElement (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/level1/core.js:1190:13) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:128:35) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:171:7) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:171:7) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:171:7) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:171:7) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:171:7) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:171:7) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:171:7) at HtmlToDom.appendHtmlToElement (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:77:9) Error: Invalid character: Invalid character in tag name: ; at Object.createElement (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/level1/core.js:1190:13) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:128:35) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:171:7) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:171:7) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:171:7) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:171:7) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:171:7) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:171:7) at setChild (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:171:7) at HtmlToDom.appendHtmlToElement (/Users/andi/node_modules/scraper/node_modules/jsdom/lib/jsdom/browser/htmltodom.js:77:9) ...
Surely, HTML is usually not correct, but syntax tolerance is expected by clients. The scraping and analyzing seems to work, but the errors are not suppressable.
Source:
var scraper;
scraper = require('scraper');
try {
scraper({
uri: 'http://google.de/',
headers: {
'User-Agent': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)'
}
}, function(e, $) {
if (e) {
throw err;
}
return $('body').each(function() {
return console.log($(this).text() + '\n\n');
});
});
} catch (e) {
console.log('ERROR');
}
I have the same problem. I'm using Node v0.4.12 on Ubuntu 11.10.