parserlib icon indicating copy to clipboard operation
parserlib copied to clipboard

put the lexer or parser's rule in cpp file

Open asmwarrior opened this issue 2 months ago • 2 comments

Hi, if I put all the grammar rules in the header file, when I change the rule, I may rebuild the whole code which include the grammar header, so I try to find a way to put the rules in a cpp file.

The my_grammar.hpp file:


#ifndef MY_GRAMMAR_HPP
#define MY_GRAMMAR_HPP

#include "parserlib.hpp"

using namespace parserlib;

// Lexer Grammar
class my_lexer_grammar {
public:
    enum class match_id_type {
        NUMBER,
        PLUS,
        MINUS,
        MULTIPLY,
        DIVIDE,
        LEFT_PAREN,
        RIGHT_PAREN
    };

    enum class error_id_type {
        INVALID_TOKEN
    };

    template <class ParseContext>
    parse_result parse(ParseContext& pc) const noexcept;
};

// Parser Grammar
class my_parser_grammar {
public:
    enum class match_id_type {
        NUM,
        ADD,
        SUB,
        MUL,
        DIV
    };

    enum class error_id_type {
        INVALID_EXPRESSION
    };

    template <class ParseContext>
    parse_result parse(ParseContext& pc) const noexcept;

private:
    template <class ParseContext>
    class grammar;  // Forward declaration
};

#endif

And the my_grammar.cpp file:

#include "my_grammar.hpp"

// ============ LEXER IMPLEMENTATION ============
template <class ParseContext>
parse_result my_lexer_grammar::parse(ParseContext& pc) const noexcept {
    const auto whitespace = terminal(' ');
    const auto digit = range('0', '9');
    const auto number = (+digit)->*match_id_type::NUMBER;
    const auto plus = terminal('+')->*match_id_type::PLUS;
    const auto minus = terminal('-')->*match_id_type::MINUS;
    const auto multiply = terminal('*')->*match_id_type::MULTIPLY;
    const auto divide = terminal('/')->*match_id_type::DIVIDE;
    const auto left_paren = terminal('(')->*match_id_type::LEFT_PAREN;
    const auto right_paren = terminal(')')->*match_id_type::RIGHT_PAREN;

    const auto token
        = number
        | plus
        | minus
        | multiply
        | divide
        | left_paren
        | right_paren;

    const auto token_error = error(error_id_type::INVALID_TOKEN, skip_until(whitespace | token));
    const auto token1 = token | token_error;
    const auto grammar = *(whitespace | token1);

    return grammar.parse(pc);
}

// ============ PARSER IMPLEMENTATION ============
template <class ParseContext>
class my_parser_grammar::grammar {
public:
    using match_id_type = my_parser_grammar::match_id_type;

    grammar() {
        // Define grammar rules in constructor
        const auto val
            = my_lexer_grammar::match_id_type::LEFT_PAREN >> add >> my_lexer_grammar::match_id_type::RIGHT_PAREN
            | terminal(my_lexer_grammar::match_id_type::NUMBER)->*match_id_type::NUM;

        mul = (mul >> my_lexer_grammar::match_id_type::MULTIPLY >> val)->*match_id_type::MUL
            | (mul >> my_lexer_grammar::match_id_type::DIVIDE >> val)->*match_id_type::DIV
            | val;

        add = (add >> my_lexer_grammar::match_id_type::PLUS >> mul)->*match_id_type::ADD
            | (add >> my_lexer_grammar::match_id_type::MINUS >> mul)->*match_id_type::SUB
            | mul;
    }

    parse_result parse(ParseContext& pc) noexcept {
        return add.parse(pc);
    }

private:
    rule<ParseContext> mul;
    rule<ParseContext> add;
};

template <class ParseContext>
parse_result my_parser_grammar::parse(ParseContext& pc) const noexcept {
    return grammar<ParseContext>().parse(pc);
}

// Explicit instantiation for lexer
template parse_result my_lexer_grammar::parse<
    parse_context<
        std::string,
        my_lexer_grammar::match_id_type,
        my_lexer_grammar::error_id_type,
        case_sensitive_comparator,
        empty_parse_context_extension
    >
>(parse_context<
    std::string,
    my_lexer_grammar::match_id_type,
    my_lexer_grammar::error_id_type,
    case_sensitive_comparator,
    empty_parse_context_extension
>&) const noexcept;

// Explicit instantiation for parser
template parse_result my_parser_grammar::parse<
    parse_context<
        std::vector<parsed_token<
            my_lexer_grammar::match_id_type,
            std::string::const_iterator
        >>,
        my_parser_grammar::match_id_type,
        my_parser_grammar::error_id_type,
        case_sensitive_comparator,
        empty_parse_context_extension
    >
>(parse_context<
    std::vector<parsed_token<
        my_lexer_grammar::match_id_type,
        std::string::const_iterator
    >>,
    my_parser_grammar::match_id_type,
    my_parser_grammar::error_id_type,
    case_sensitive_comparator,
    empty_parse_context_extension
>&) const noexcept;

Here is the client code main.cpp which run the lexer and parser

#include <iostream>
#include <cassert>
#include "my_grammar.hpp"

using namespace parserlib;

int main() {
    // Test lexer only
    std::string source1 = "1 + 2 * 3";
    auto lexer_result = lexer<std::string, my_lexer_grammar>::parse(source1);

    std::cout << "Lexer test: " << (lexer_result.success ? "SUCCESS" : "FAILED") << "\n";
    std::cout << "Tokens found: " << lexer_result.parsed_tokens.size() << "\n\n";

    // Test full parser
    std::string source2 = "1 + 2 * 3";
    auto parser_result = parser<std::string, my_lexer_grammar, my_parser_grammar>::parse(source2);

    std::cout << "Parser test: " << (parser_result.success ? "SUCCESS" : "FAILED") << "\n";
    std::cout << "AST nodes: " << parser_result.ast_nodes.size() << "\n";

    if (!parser_result.ast_nodes.empty()) {
        std::cout << "Root node ID: " << static_cast<int>(parser_result.ast_nodes[0]->id()) << "\n";
    }

    return 0;
}

When I change the my_grammar.cpp file, for example some rules in the cpp file, I don't need to compile the main.cpp.

I hope this can help others.

asmwarrior avatar Oct 31 '25 14:10 asmwarrior

Nice trick!

axilmar avatar Oct 31 '25 15:10 axilmar

The test code is generated by AI. You can see the deepwiki site here:

https://deepwiki.com/axilmar/parserlib

I can chat with the AI and ask it to generate the code. I think the AI is very smart, it gives the detailed template argument to instantiate the classes in the cpp file, so I won't get the linker errors.

asmwarrior avatar Nov 01 '25 13:11 asmwarrior