tl
tl copied to clipboard
Comment ending in `--->` causes parser to skip following nodes
I encountered an issue where parsing breaks after a comment ending in ---> instead of the usual -->:
use tl::{parse, ParserOptions};
fn main() {
let dom = parse(r#"
<div class="ok"></div>
<!-- Comment with extra hyphen >--->
<div class="unreachable"></div>
"#, ParserOptions::default()).unwrap();
dom.get_elements_by_class_name("ok").next().expect(".ok node");
dom.get_elements_by_class_name("unreachable").next().expect(".unreachable node");
}
Printing the DOM:
VDom {
parser: Parser {
stream: Stream { ... },
stack: [],
options: ParserOptions {
flags: 0,
},
tags: [
Raw(
Bytes(
"\n ",
),
),
Tag(
HTMLTag {
_name: Bytes(
"div",
),
_attributes: Attributes {
raw: InlineHashMap(
InlineHashMap<0 items>,
),
id: None,
class: Some(
Bytes(
"ok",
),
),
},
_children: InlineVec(
InlineVec<0 items>,
),
_raw: Bytes(
"<div class=\"ok\"></div>",
),
},
),
Raw(
Bytes(
"\n ",
),
),
Comment(
Bytes(
"",
),
),
],
ast: [
NodeHandle(
0,
),
NodeHandle(
1,
),
NodeHandle(
2,
),
NodeHandle(
3,
),
],
ids: {},
classes: {},
version: None,
},
}
The last node is the comment, and it's parsed as if it's empty. Removing the extra dash produces the expected result
I noticed something in the Parser impl:
if self
.stream
.slice_len(idx, constants::COMMENT.len())
.eq(constants::COMMENT)
{
self.stream.advance_by(constants::COMMENT.len());
let is_end_of_comment = self.stream.expect_and_skip_cond(b'>');
if is_end_of_comment {
return self.stream.slice(start, self.stream.idx);
}
}
If I'm reading this right, this checks for -- and skips it, then ends the loop if the next char is >. My weird example breaks this assumption: the next char sequence after skipping is ->, so the loop continues until eof. An even number of dashes would work, but an odd number wouldn't. Maybe instead check for a single dash with > following instead of --?