put the lexer or parser's rule in cpp file
Hi, if I put all the grammar rules in the header file, when I change the rule, I may rebuild the whole code which include the grammar header, so I try to find a way to put the rules in a cpp file.
The my_grammar.hpp file:
#ifndef MY_GRAMMAR_HPP
#define MY_GRAMMAR_HPP
#include "parserlib.hpp"
using namespace parserlib;
// Lexer Grammar
class my_lexer_grammar {
public:
enum class match_id_type {
NUMBER,
PLUS,
MINUS,
MULTIPLY,
DIVIDE,
LEFT_PAREN,
RIGHT_PAREN
};
enum class error_id_type {
INVALID_TOKEN
};
template <class ParseContext>
parse_result parse(ParseContext& pc) const noexcept;
};
// Parser Grammar
class my_parser_grammar {
public:
enum class match_id_type {
NUM,
ADD,
SUB,
MUL,
DIV
};
enum class error_id_type {
INVALID_EXPRESSION
};
template <class ParseContext>
parse_result parse(ParseContext& pc) const noexcept;
private:
template <class ParseContext>
class grammar; // Forward declaration
};
#endif
And the my_grammar.cpp file:
#include "my_grammar.hpp"
// ============ LEXER IMPLEMENTATION ============
template <class ParseContext>
parse_result my_lexer_grammar::parse(ParseContext& pc) const noexcept {
const auto whitespace = terminal(' ');
const auto digit = range('0', '9');
const auto number = (+digit)->*match_id_type::NUMBER;
const auto plus = terminal('+')->*match_id_type::PLUS;
const auto minus = terminal('-')->*match_id_type::MINUS;
const auto multiply = terminal('*')->*match_id_type::MULTIPLY;
const auto divide = terminal('/')->*match_id_type::DIVIDE;
const auto left_paren = terminal('(')->*match_id_type::LEFT_PAREN;
const auto right_paren = terminal(')')->*match_id_type::RIGHT_PAREN;
const auto token
= number
| plus
| minus
| multiply
| divide
| left_paren
| right_paren;
const auto token_error = error(error_id_type::INVALID_TOKEN, skip_until(whitespace | token));
const auto token1 = token | token_error;
const auto grammar = *(whitespace | token1);
return grammar.parse(pc);
}
// ============ PARSER IMPLEMENTATION ============
template <class ParseContext>
class my_parser_grammar::grammar {
public:
using match_id_type = my_parser_grammar::match_id_type;
grammar() {
// Define grammar rules in constructor
const auto val
= my_lexer_grammar::match_id_type::LEFT_PAREN >> add >> my_lexer_grammar::match_id_type::RIGHT_PAREN
| terminal(my_lexer_grammar::match_id_type::NUMBER)->*match_id_type::NUM;
mul = (mul >> my_lexer_grammar::match_id_type::MULTIPLY >> val)->*match_id_type::MUL
| (mul >> my_lexer_grammar::match_id_type::DIVIDE >> val)->*match_id_type::DIV
| val;
add = (add >> my_lexer_grammar::match_id_type::PLUS >> mul)->*match_id_type::ADD
| (add >> my_lexer_grammar::match_id_type::MINUS >> mul)->*match_id_type::SUB
| mul;
}
parse_result parse(ParseContext& pc) noexcept {
return add.parse(pc);
}
private:
rule<ParseContext> mul;
rule<ParseContext> add;
};
template <class ParseContext>
parse_result my_parser_grammar::parse(ParseContext& pc) const noexcept {
return grammar<ParseContext>().parse(pc);
}
// Explicit instantiation for lexer
template parse_result my_lexer_grammar::parse<
parse_context<
std::string,
my_lexer_grammar::match_id_type,
my_lexer_grammar::error_id_type,
case_sensitive_comparator,
empty_parse_context_extension
>
>(parse_context<
std::string,
my_lexer_grammar::match_id_type,
my_lexer_grammar::error_id_type,
case_sensitive_comparator,
empty_parse_context_extension
>&) const noexcept;
// Explicit instantiation for parser
template parse_result my_parser_grammar::parse<
parse_context<
std::vector<parsed_token<
my_lexer_grammar::match_id_type,
std::string::const_iterator
>>,
my_parser_grammar::match_id_type,
my_parser_grammar::error_id_type,
case_sensitive_comparator,
empty_parse_context_extension
>
>(parse_context<
std::vector<parsed_token<
my_lexer_grammar::match_id_type,
std::string::const_iterator
>>,
my_parser_grammar::match_id_type,
my_parser_grammar::error_id_type,
case_sensitive_comparator,
empty_parse_context_extension
>&) const noexcept;
Here is the client code main.cpp which run the lexer and parser
#include <iostream>
#include <cassert>
#include "my_grammar.hpp"
using namespace parserlib;
int main() {
// Test lexer only
std::string source1 = "1 + 2 * 3";
auto lexer_result = lexer<std::string, my_lexer_grammar>::parse(source1);
std::cout << "Lexer test: " << (lexer_result.success ? "SUCCESS" : "FAILED") << "\n";
std::cout << "Tokens found: " << lexer_result.parsed_tokens.size() << "\n\n";
// Test full parser
std::string source2 = "1 + 2 * 3";
auto parser_result = parser<std::string, my_lexer_grammar, my_parser_grammar>::parse(source2);
std::cout << "Parser test: " << (parser_result.success ? "SUCCESS" : "FAILED") << "\n";
std::cout << "AST nodes: " << parser_result.ast_nodes.size() << "\n";
if (!parser_result.ast_nodes.empty()) {
std::cout << "Root node ID: " << static_cast<int>(parser_result.ast_nodes[0]->id()) << "\n";
}
return 0;
}
When I change the my_grammar.cpp file, for example some rules in the cpp file, I don't need to compile the main.cpp.
I hope this can help others.
Nice trick!
The test code is generated by AI. You can see the deepwiki site here:
https://deepwiki.com/axilmar/parserlib
I can chat with the AI and ask it to generate the code. I think the AI is very smart, it gives the detailed template argument to instantiate the classes in the cpp file, so I won't get the linker errors.