Invalid error span for eoi
I'm using my own lexer that returns vector of spanned tokens like this:
pub struct Token(pub String);
pub type Spanned<T> = (T, Span);
fn tokenize(input: &str) -> Vec<Spanned<Token>>
For input like this:
let input = "2 as";
let tokens = tokenize(input); // [(Token("2"), 0..1), (Token("as"), 2..4)]
I've noticed an incorrect span for error when compiler looks something like this:
fn ident<'a>(ident_name: String) -> ... {
any()
.try_map(move |token: Token, span| {
let word = token.to_string();
let mut chars = word.chars();
let first_char = chars.next().unwrap();
if !first_char.is_ascii_alphabetic() && first_char != '_' {
return Err(Rich::custom(
span,
"identifier must start with a letter or an underscore",
));
}
for char in chars {
if !char.is_ascii_alphanumeric() && char != '_' {
return Err(Rich::custom(
span,
"identifier must contain only alphanumeric characters or underscores",
));
}
}
if KEYWORDS.contains(&word.as_str()) {
return Err(Rich::custom(
span,
format!("keyword used as {ident_name} name"),
));
}
Ok(word)
})
}
let int = any().try_map(|token: Token, span: SimpleSpan| {
let word = token.to_string();
for char in word.chars() {
if !char.is_ascii_digit() {
return Err(Rich::custom(span, "int must contain only digits"));
}
}
Ok(word)
});
let number = int
.then(just(T!['.']).ignore_then(int).or_not())
.map(|(int, float)| {
let float = float.unwrap_or('0'.to_string());
format!("{}.{}", int, float)
})
.from_str::<f32>()
.unwrapped()
.map_with(|num, e| (Expression::Number((num, e.span())), e.span()))
.boxed();
let parser = number
.then(
just(T!["as"])
.ignore_then(
ident("type".to_string())
.map_with(|txt, e| (txt, e.span()))
.recover_with(via_parser(
any()
.or(end().map(|_| T![""]))
.map_with(|_, e| ("ERROR".to_string(), e.span())),
)),
)
.boxed(),
)
.map_with(|(exp, ty), e| (Expression::Cast(Box::new(exp), ty), e.span()))
.boxed();
let spanned_tokens = tokens.spanned(SimpleSpan::new(eoi, eoi));
parser.parse(spanned_tokens)
Parsing results in:
output: Some((
Cast(
(Number((2.0, 0..1)), 0..1),
("ERROR", 0..4),
), 0..4))
errors: "found end of input at 0..4 expected something else"
Error has a span of 0..4 but It should have 4..4
Interesting, that seems odd. Any chance you could cut bits off until you have a minimum testable example that exhibits the behaviour?
@zesterer Shorter example:
let parser = just(T!["as"])
.map_with(
|t, e| (t, e.span()),
)
.then(
just(T!["text"])
.recover_with(via_parser(any().or_not().map(|_| T!["error"])))
.map_with(|t, e| (t, e.span())),
)
.map_with(|(t1, t2), e| (t1, t2, e.span()));
For input: "as"
tokens: [("as", 0..2)]
eoi: 2..2
Result is:
output: Some(("as", 0..2), ("error", 0..2), 0..2)
errs: found end of input at 0..2 expected "text"
Do you have something I can compile and run locally? This seems to depend on a lot of external things.
@zesterer This should run out of the box:
use chumsky::{
input::{SpannedInput, ValueInput},
prelude::{Parser as ChumskyParser, *},
};
use extra::Err;
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
struct Token(pub String);
trait Lexer<'src, Output>:
ChumskyParser<'src, &'src str, Output, Err<Rich<'src, char>>> + Clone
{
}
impl<
'src,
Output,
T: ChumskyParser<'src, &'src str, Output, extra::Err<Rich<'src, char>>> + Clone,
> Lexer<'src, Output> for T
{
}
trait Parser<'src, Output>:
ChumskyParser<
'src,
SpannedInput<Token, SimpleSpan, &'src [(Token, SimpleSpan)]>,
Output,
Err<Rich<'src, Token>>,
> + Clone
{
}
impl<
'src,
Output,
T: ChumskyParser<
'src,
SpannedInput<Token, SimpleSpan, &'src [(Token, SimpleSpan)]>,
Output,
extra::Err<Rich<'src, Token>>,
> + Clone,
> Parser<'src, Output> for T
{
}
fn lexer<'a>() -> impl Lexer<'a, Vec<(Token, SimpleSpan)>> {
any::<&'a str, Err<Rich<'a, char>>>()
.filter(|c: &char| !c.is_whitespace())
.repeated()
.at_least(1)
.collect::<String>()
.map_with(|s, e| (Token(s), e.span()))
.then_ignore(any().filter(|c: &char| c.is_whitespace()).repeated())
.repeated()
.collect::<Vec<(Token, SimpleSpan)>>()
.then_ignore(end())
}
fn parser<'a>() -> impl Parser<'a, Vec<(Token, SimpleSpan)>> {
just(Token("as".to_string()))
.map_with(|t, e| (t, e.span()))
.then(
just(Token("text".to_string()))
.recover_with(via_parser(
any().or_not().map(|_| Token("error".to_string())),
))
.map_with(|t, e| (t, e.span())),
)
.map(|(t1, t2)| vec![t1, t2])
}
fn main() {
let input = "as";
let tokens = lexer().parse(input).unwrap();
let end = tokens.last().unwrap().1.end;
let spanned_input = tokens.spanned(SimpleSpan::new(end, end));
let result = parser().parse(spanned_input).into_result();
println!("{:?}", result);
}