chumsky icon indicating copy to clipboard operation
chumsky copied to clipboard

Invalid error span for eoi

Open KrosFire opened this issue 1 year ago • 4 comments

I'm using my own lexer that returns vector of spanned tokens like this:

pub struct Token(pub String);
pub type Spanned<T> = (T, Span);

fn tokenize(input: &str) -> Vec<Spanned<Token>>

For input like this:

let input = "2 as";
let tokens = tokenize(input); // [(Token("2"), 0..1), (Token("as"), 2..4)]

I've noticed an incorrect span for error when compiler looks something like this:

fn ident<'a>(ident_name: String) -> ... {
    any()
        .try_map(move |token: Token, span| {
            let word = token.to_string();
            let mut chars = word.chars();

            let first_char = chars.next().unwrap();

            if !first_char.is_ascii_alphabetic() && first_char != '_' {
                return Err(Rich::custom(
                    span,
                    "identifier must start with a letter or an underscore",
                ));
            }

            for char in chars {
                if !char.is_ascii_alphanumeric() && char != '_' {
                    return Err(Rich::custom(
                        span,
                        "identifier must contain only alphanumeric characters or underscores",
                    ));
                }
            }

            if KEYWORDS.contains(&word.as_str()) {
                return Err(Rich::custom(
                    span,
                    format!("keyword used as {ident_name} name"),
                ));
            }

            Ok(word)
        })
}

let int = any().try_map(|token: Token, span: SimpleSpan| {
    let word = token.to_string();

    for char in word.chars() {
        if !char.is_ascii_digit() {
            return Err(Rich::custom(span, "int must contain only digits"));
        }
    }

    Ok(word)
});

let number = int
    .then(just(T!['.']).ignore_then(int).or_not())
    .map(|(int, float)| {
        let float = float.unwrap_or('0'.to_string());

        format!("{}.{}", int, float)
    })
    .from_str::<f32>()
    .unwrapped()
    .map_with(|num, e| (Expression::Number((num, e.span())), e.span()))
    .boxed();

let parser = number
    .then(
        just(T!["as"])
            .ignore_then(
                ident("type".to_string())
                    .map_with(|txt, e| (txt, e.span()))
                    .recover_with(via_parser(
                        any()
                            .or(end().map(|_| T![""]))
                            .map_with(|_, e| ("ERROR".to_string(), e.span())),
                    )),
            )
            .boxed(),
    )
    .map_with(|(exp, ty), e| (Expression::Cast(Box::new(exp), ty), e.span()))
    .boxed();

let spanned_tokens = tokens.spanned(SimpleSpan::new(eoi, eoi));

parser.parse(spanned_tokens)

Parsing results in:

output: Some((
Cast(
  (Number((2.0, 0..1)), 0..1),
  ("ERROR", 0..4),
), 0..4))
errors: "found end of input at 0..4 expected something else"

Error has a span of 0..4 but It should have 4..4

KrosFire avatar Nov 19 '24 15:11 KrosFire

Interesting, that seems odd. Any chance you could cut bits off until you have a minimum testable example that exhibits the behaviour?

zesterer avatar Nov 20 '24 09:11 zesterer

@zesterer Shorter example:

let parser = just(T!["as"])
    .map_with(
        |t, e| (t, e.span()),
    )
    .then(
        just(T!["text"])
            .recover_with(via_parser(any().or_not().map(|_| T!["error"])))
            .map_with(|t, e| (t, e.span())),
    )
    .map_with(|(t1, t2), e| (t1, t2, e.span()));

For input: "as"

tokens: [("as", 0..2)]

eoi: 2..2

Result is:

output: Some(("as", 0..2), ("error", 0..2), 0..2)
errs: found end of input at 0..2 expected "text"

KrosFire avatar Nov 20 '24 16:11 KrosFire

Do you have something I can compile and run locally? This seems to depend on a lot of external things.

zesterer avatar Nov 28 '24 09:11 zesterer

@zesterer This should run out of the box:

use chumsky::{
    input::{SpannedInput, ValueInput},
    prelude::{Parser as ChumskyParser, *},
};
use extra::Err;

#[derive(Debug, Clone, PartialEq, Eq, Hash)]
struct Token(pub String);

trait Lexer<'src, Output>:
    ChumskyParser<'src, &'src str, Output, Err<Rich<'src, char>>> + Clone
{
}

impl<
        'src,
        Output,
        T: ChumskyParser<'src, &'src str, Output, extra::Err<Rich<'src, char>>> + Clone,
    > Lexer<'src, Output> for T
{
}

trait Parser<'src, Output>:
    ChumskyParser<
        'src,
        SpannedInput<Token, SimpleSpan, &'src [(Token, SimpleSpan)]>,
        Output,
        Err<Rich<'src, Token>>,
    > + Clone
{
}

impl<
        'src,
        Output,
        T: ChumskyParser<
                'src,
                SpannedInput<Token, SimpleSpan, &'src [(Token, SimpleSpan)]>,
                Output,
                extra::Err<Rich<'src, Token>>,
            > + Clone,
    > Parser<'src, Output> for T
{
}

fn lexer<'a>() -> impl Lexer<'a, Vec<(Token, SimpleSpan)>> {
    any::<&'a str, Err<Rich<'a, char>>>()
        .filter(|c: &char| !c.is_whitespace())
        .repeated()
        .at_least(1)
        .collect::<String>()
        .map_with(|s, e| (Token(s), e.span()))
        .then_ignore(any().filter(|c: &char| c.is_whitespace()).repeated())
        .repeated()
        .collect::<Vec<(Token, SimpleSpan)>>()
        .then_ignore(end())
}

fn parser<'a>() -> impl Parser<'a, Vec<(Token, SimpleSpan)>> {
    just(Token("as".to_string()))
        .map_with(|t, e| (t, e.span()))
        .then(
            just(Token("text".to_string()))
                .recover_with(via_parser(
                    any().or_not().map(|_| Token("error".to_string())),
                ))
                .map_with(|t, e| (t, e.span())),
        )
        .map(|(t1, t2)| vec![t1, t2])
}

fn main() {

    let input = "as";
    let tokens = lexer().parse(input).unwrap();

    let end = tokens.last().unwrap().1.end;
    let spanned_input = tokens.spanned(SimpleSpan::new(end, end));
    let result = parser().parse(spanned_input).into_result();
    println!("{:?}", result);
}

KrosFire avatar Nov 29 '24 19:11 KrosFire