bnfgen icon indicating copy to clipboard operation
bnfgen copied to clipboard

Need a help with this bnf for sql expressions

Open mingodad opened this issue 3 years ago • 2 comments

Hello !

I'm testing this tool with a subset of sqlite expressions and it hits maximum recursion too often could someone give some help to improve it ?

I noticed that for terminals it's best to add space inside the strings than to use --separator.

I'm testing it here https://baturin.org/tools/bnfgen/ with:

--max-reductions = 1000000
--max-nonproductive-reductions = 10000
<start> ::= "select ( " <expr> " ) as expr;" ;

<expr>	::=
	<term>
	| "(" <expr> ")"
	| <id>
	#| JOIN_KW
	#| <nm> "." <nm>
	#| <nm> "." <nm> "." <nm>
	#| VARIABLE
	| <expr> " collate " <ids>
	| " CAST" "(" <expr> " as " <typetoken> ")"
	| <id> "(" <distinct>{0,1} <exprlist> ")"
	| <id> "(" "*" ")"
	#| <id> "(" distinct{0,1} <exprlist> ")" filter_over
	#| <id> "(" "*" ")" filter_over
	| "(" <nexprlist> "," <expr> ")"
	| <expr> <binary_op> <expr>
	#| <expr> likeop <expr>
	#| <expr> likeop <expr> " ESCAPE " <expr>
	| <expr> <suffix_op>
	| <unary_op> <expr>
	| <expr> <between_op> <expr> " AND " <expr>
	| <expr> <in_op> "(" <exprlist> ")"
	| "(" <select> ")"
	| <expr> <in_op> "(" <select> ")"
	#| <expr> <in_op> nm dbnm paren_exprlist
	| " EXISTS" "(" <select> ")"
	| " CASE " <expr>{0,1} <case_exprlist> <case_else>{0,1} " END "
	#| " RAISE" "(" "IGNORE" ")"
	#| " RAISE" "(" raisetype "," nm ")"
	;

<exprlist>	::=
	<nexprlist>
	;

<nexprlist>	::=
	<nexprlist> "," <expr>
	| <expr>
	;

<paren_exprlist>	::=
	#/* empty */
	"(" <exprlist>{0,1} ")"
	;

<term>	::=
	" NULL "
	| <number>
	#| BLOB
	| <STRING>
	#| CTIME_KW
	;

<binary_op> ::=
	<logical_op>
	| <comparison_op>
	| <equality_op>
	| <bitwise_op>
	| <add_op>
	| <mul_op>
	| " || " #CONCAT
	| " IS "
	| " IS NOT "
	;

<comparison_op> ::=
	" < " | " > " | " >= " | " <= "
	;

<equality_op> ::=
	" = " | " <> "
	;

<bitwise_op> ::=
	" & " | " | " | " << " | " >> "
	;

<add_op> ::=
	" + " | " - "
	;

<mul_op> ::=
	" * " | " / " | " % "
	;

<logical_op> ::=
	" AND " | " OR "
	;

<unary_op> ::=
	" NOT "
	| " !"
	| " +"
	| " -"
	;

<suffix_op> ::=
	" NOT NULL "
	| " ISNULL "
	| " NOTNULL "
	;

<STRING> ::= "'str'";

<id> ::=
	"ab"
	| "cd"
	| "ef"
	| "gh"
	;

<nm> ::= <id>
	| <STRING>
	#| JOIN_KW
	;

<ids> ::=
	<id> | <STRING>
	;

<distinct>	::=
	" DISTINCT "
	| " ALL "
	;

<in_op> ::=
	" IN "
	| " NOT IN "
	;

<between_op> ::=
	" BETWEEN "
	| " NOT BETWEEN "
	;

<typetoken>	::=	#/* empty */
	<typename>
	| <typename> "(" <signed> ")"
	| <typename> "(" <signed> "," <signed> ")"
	;

<typename>	::=
	<ids>
	| <typename> <ids>
	;

<signed>	::=
	<plus_num>
	| <minus_num>
	;

<plus_num> ::=
	" +"{0,1} <number>
	;

<minus_num> ::=
	" -" <number>
	;

<nzdigit> ::= "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
<digit> ::= "0" | <nzdigit> ;

<integer> ::= <nzdigit> <digit>{1, 3};
<float> ::= <nzdigit>{1, 3} "." <digit>{1, 3};
<number> ::= <integer> | <float>;

<select> ::=
	" select 1 as one "
	;

<case_exprlist>	::=
	<case_exprlist> " WHEN " <expr> " THEN " <expr>
	| " WHEN " <expr> " THEN " <expr>
	;

<case_else>	::=
	" ELSE " <expr>
	;

Cheers !

mingodad avatar Aug 02 '21 09:08 mingodad

Here is a basic expression that gives good results most of the time:

<start> ::= <expr> ;

<expr> ::=
	<expr> <operations> <expr>
	| "(" <expr> ")"
	| <number>
	;

<operations> ::=
	"+" | "-" | "*" | "/" | "%"
	;

<nzdigit> ::= "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
<digit> ::= "0" | <nzdigit> ;

<integer> ::= <nzdigit> <digit>{1, 3};
<float> ::= <nzdigit>{1, 3} "." <digit>{1, 3};
<number> ::= <integer> | <float>;

mingodad avatar Aug 02 '21 10:08 mingodad

Here is a simple C program for the simple expr grammar to help understand one possible simple implementation of it:

#include <stdio.h>
#include <stdlib.h>
#include <time.h>

/*
<start> ::= <expr> ;

<expr> ::=
	<expr> <operations> <expr>
	| "(" <expr> ")"
	| <number>
	;

<operations> ::=
	"+" | "-" | "*" | "/" | "%"
	;

<nzdigit> ::= "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
<digit> ::= "0" | <nzdigit> ;

<integer> ::= <nzdigit> <digit>{0, 3};
<float> ::= <integer> "." <integer>;
<number> ::= <integer> | <float>;
*/

static int xrandom(int low, int up) {
	double r = (double)rand() * (1.0 / ((double)RAND_MAX + 1.0));
	r *= (double)(up - low) + 1.0;
	return ((int)r + low);
}

static int max_xcount = 2048;
static int xcount = 0;
static void xoutput(const char *str) {
	while(*str) {
		putc(*str, stdout);
		++xcount;
		++str;
	}
}


void expr_nzdigit() {
	switch(xrandom(1,9)) {
		case 1: xoutput("1"); break;
		case 2: xoutput("2"); break;
		case 3: xoutput("3"); break;
		case 4: xoutput("4"); break;
		case 5: xoutput("5"); break;
		case 6: xoutput("6"); break;
		case 7: xoutput("7"); break;
		case 8: xoutput("8"); break;
		case 9: xoutput("9"); break;
	}
}

void expr_digit() {
	switch(xrandom(0,9)) {
		case 0: xoutput("0"); break;
		case 1: xoutput("1"); break;
		case 2: xoutput("2"); break;
		case 3: xoutput("3"); break;
		case 4: xoutput("4"); break;
		case 5: xoutput("5"); break;
		case 6: xoutput("6"); break;
		case 7: xoutput("7"); break;
		case 8: xoutput("8"); break;
		case 9: xoutput("9"); break;
	}
}

void expr_integer() {
	expr_nzdigit();
	int imax = xrandom(0,3);
	for(int i=0; i < imax; ++i) expr_digit();
}

void expr_float() {
	expr_integer();
	xoutput(".");
	expr_integer();
}

void expr_number() {
	switch(xrandom(0,1)) {
		case 0: expr_integer(); break;
		case 1: expr_float(); break;
	}
}

void expr_operations() {
	switch(xrandom(0,4)) {
		case 0: xoutput("+"); break;
		case 1: xoutput("-"); break;
		case 2: xoutput("*"); break;
		case 3: xoutput("/"); break;
		case 4: xoutput("%"); break;
	}
}

void expr_expr() {
	if(xcount > max_xcount) return;
	switch(xrandom(0,2)) {
		case 0: expr_expr(); expr_operations(); expr_expr(); break;
		case 1: xoutput("("); expr_expr(); xoutput(")"); break;
		case 2: expr_number(); break;
	}
}

void expr_start() {
	xcount = 0;
	srand(clock());
	expr_expr();
	xoutput("\n");
	if(xcount > max_xcount)
		printf("Output exceeds limit of %d\n", max_xcount);
}

/*
Accpets one integer parameter to limit the output
*/
int main(int argc, char *argv[]) {
	if(argc > 1) {
		max_xcount = atoi(argv[1]);
	}
	expr_start();
	return 0;
}

mingodad avatar Aug 03 '21 08:08 mingodad