bnfgen
bnfgen copied to clipboard
Need a help with this bnf for sql expressions
Hello !
I'm testing this tool with a subset of sqlite
expressions and it hits maximum recursion
too often could someone give some help to improve it ?
I noticed that for terminals it's best to add space inside the strings than to use --separator
.
I'm testing it here https://baturin.org/tools/bnfgen/ with:
--max-reductions = 1000000
--max-nonproductive-reductions = 10000
<start> ::= "select ( " <expr> " ) as expr;" ;
<expr> ::=
<term>
| "(" <expr> ")"
| <id>
#| JOIN_KW
#| <nm> "." <nm>
#| <nm> "." <nm> "." <nm>
#| VARIABLE
| <expr> " collate " <ids>
| " CAST" "(" <expr> " as " <typetoken> ")"
| <id> "(" <distinct>{0,1} <exprlist> ")"
| <id> "(" "*" ")"
#| <id> "(" distinct{0,1} <exprlist> ")" filter_over
#| <id> "(" "*" ")" filter_over
| "(" <nexprlist> "," <expr> ")"
| <expr> <binary_op> <expr>
#| <expr> likeop <expr>
#| <expr> likeop <expr> " ESCAPE " <expr>
| <expr> <suffix_op>
| <unary_op> <expr>
| <expr> <between_op> <expr> " AND " <expr>
| <expr> <in_op> "(" <exprlist> ")"
| "(" <select> ")"
| <expr> <in_op> "(" <select> ")"
#| <expr> <in_op> nm dbnm paren_exprlist
| " EXISTS" "(" <select> ")"
| " CASE " <expr>{0,1} <case_exprlist> <case_else>{0,1} " END "
#| " RAISE" "(" "IGNORE" ")"
#| " RAISE" "(" raisetype "," nm ")"
;
<exprlist> ::=
<nexprlist>
;
<nexprlist> ::=
<nexprlist> "," <expr>
| <expr>
;
<paren_exprlist> ::=
#/* empty */
"(" <exprlist>{0,1} ")"
;
<term> ::=
" NULL "
| <number>
#| BLOB
| <STRING>
#| CTIME_KW
;
<binary_op> ::=
<logical_op>
| <comparison_op>
| <equality_op>
| <bitwise_op>
| <add_op>
| <mul_op>
| " || " #CONCAT
| " IS "
| " IS NOT "
;
<comparison_op> ::=
" < " | " > " | " >= " | " <= "
;
<equality_op> ::=
" = " | " <> "
;
<bitwise_op> ::=
" & " | " | " | " << " | " >> "
;
<add_op> ::=
" + " | " - "
;
<mul_op> ::=
" * " | " / " | " % "
;
<logical_op> ::=
" AND " | " OR "
;
<unary_op> ::=
" NOT "
| " !"
| " +"
| " -"
;
<suffix_op> ::=
" NOT NULL "
| " ISNULL "
| " NOTNULL "
;
<STRING> ::= "'str'";
<id> ::=
"ab"
| "cd"
| "ef"
| "gh"
;
<nm> ::= <id>
| <STRING>
#| JOIN_KW
;
<ids> ::=
<id> | <STRING>
;
<distinct> ::=
" DISTINCT "
| " ALL "
;
<in_op> ::=
" IN "
| " NOT IN "
;
<between_op> ::=
" BETWEEN "
| " NOT BETWEEN "
;
<typetoken> ::= #/* empty */
<typename>
| <typename> "(" <signed> ")"
| <typename> "(" <signed> "," <signed> ")"
;
<typename> ::=
<ids>
| <typename> <ids>
;
<signed> ::=
<plus_num>
| <minus_num>
;
<plus_num> ::=
" +"{0,1} <number>
;
<minus_num> ::=
" -" <number>
;
<nzdigit> ::= "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
<digit> ::= "0" | <nzdigit> ;
<integer> ::= <nzdigit> <digit>{1, 3};
<float> ::= <nzdigit>{1, 3} "." <digit>{1, 3};
<number> ::= <integer> | <float>;
<select> ::=
" select 1 as one "
;
<case_exprlist> ::=
<case_exprlist> " WHEN " <expr> " THEN " <expr>
| " WHEN " <expr> " THEN " <expr>
;
<case_else> ::=
" ELSE " <expr>
;
Cheers !
Here is a basic expression that gives good results most of the time:
<start> ::= <expr> ;
<expr> ::=
<expr> <operations> <expr>
| "(" <expr> ")"
| <number>
;
<operations> ::=
"+" | "-" | "*" | "/" | "%"
;
<nzdigit> ::= "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
<digit> ::= "0" | <nzdigit> ;
<integer> ::= <nzdigit> <digit>{1, 3};
<float> ::= <nzdigit>{1, 3} "." <digit>{1, 3};
<number> ::= <integer> | <float>;
Here is a simple C program for the simple expr grammar to help understand one possible simple implementation of it:
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
/*
<start> ::= <expr> ;
<expr> ::=
<expr> <operations> <expr>
| "(" <expr> ")"
| <number>
;
<operations> ::=
"+" | "-" | "*" | "/" | "%"
;
<nzdigit> ::= "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
<digit> ::= "0" | <nzdigit> ;
<integer> ::= <nzdigit> <digit>{0, 3};
<float> ::= <integer> "." <integer>;
<number> ::= <integer> | <float>;
*/
static int xrandom(int low, int up) {
double r = (double)rand() * (1.0 / ((double)RAND_MAX + 1.0));
r *= (double)(up - low) + 1.0;
return ((int)r + low);
}
static int max_xcount = 2048;
static int xcount = 0;
static void xoutput(const char *str) {
while(*str) {
putc(*str, stdout);
++xcount;
++str;
}
}
void expr_nzdigit() {
switch(xrandom(1,9)) {
case 1: xoutput("1"); break;
case 2: xoutput("2"); break;
case 3: xoutput("3"); break;
case 4: xoutput("4"); break;
case 5: xoutput("5"); break;
case 6: xoutput("6"); break;
case 7: xoutput("7"); break;
case 8: xoutput("8"); break;
case 9: xoutput("9"); break;
}
}
void expr_digit() {
switch(xrandom(0,9)) {
case 0: xoutput("0"); break;
case 1: xoutput("1"); break;
case 2: xoutput("2"); break;
case 3: xoutput("3"); break;
case 4: xoutput("4"); break;
case 5: xoutput("5"); break;
case 6: xoutput("6"); break;
case 7: xoutput("7"); break;
case 8: xoutput("8"); break;
case 9: xoutput("9"); break;
}
}
void expr_integer() {
expr_nzdigit();
int imax = xrandom(0,3);
for(int i=0; i < imax; ++i) expr_digit();
}
void expr_float() {
expr_integer();
xoutput(".");
expr_integer();
}
void expr_number() {
switch(xrandom(0,1)) {
case 0: expr_integer(); break;
case 1: expr_float(); break;
}
}
void expr_operations() {
switch(xrandom(0,4)) {
case 0: xoutput("+"); break;
case 1: xoutput("-"); break;
case 2: xoutput("*"); break;
case 3: xoutput("/"); break;
case 4: xoutput("%"); break;
}
}
void expr_expr() {
if(xcount > max_xcount) return;
switch(xrandom(0,2)) {
case 0: expr_expr(); expr_operations(); expr_expr(); break;
case 1: xoutput("("); expr_expr(); xoutput(")"); break;
case 2: expr_number(); break;
}
}
void expr_start() {
xcount = 0;
srand(clock());
expr_expr();
xoutput("\n");
if(xcount > max_xcount)
printf("Output exceeds limit of %d\n", max_xcount);
}
/*
Accpets one integer parameter to limit the output
*/
int main(int argc, char *argv[]) {
if(argc > 1) {
max_xcount = atoi(argv[1]);
}
expr_start();
return 0;
}