node-htmlparser
node-htmlparser copied to clipboard
<source> tags are not parsed properly
I might be doing the readout wrong, but this is the second time I've picked this up. It seems that <source>
isn't identified as a void tag, so they become children of one another when listed inside a <video>
:
var htmlparser = require('htmlparser');
var htmlContent = "<html><head></head><body><video><source src=\"foo.ogv\"><source src=\"lol.smaz\"></video><div></div></body></html>";
var handler = new htmlparser.DefaultHandler(function (error, dom) {
function parse(dom, spacing){
console.log(spacing, dom.name);
if(dom.children){
for(var i=0; i<dom.children.length; ++i){
parse(dom.children[i], spacing + ' ');
}
}
}
parse(dom[0], '');
});
new htmlparser.Parser(handler).parseComplete(htmlContent);
I came across this and thought I'd post my solution so someone else doesn't have to figure out one.
When I import htmlparser
I add the new void elements to the existing list like this:
const htmlparser = require('htmlparser');
htmlparser.DefaultHandler._emptyTags = {
...htmlparser.DefaultHandler._emptyTags,
source: 1,
track: 1,
wbr: 1,
};