Parsing tradingview's pinescript
I wanted to try and parse a subset of tradingview's pinescript and create equivalent javascript code. I found antlr 3 lexer and parser definitions here: lexer parser.
These indicate 'ANTLR v3 syntax' so I combined these into a file called Tvscript.g file (below) and and ran
java -classpath E:\Users\Travis\Documents\Code\pinescript-parser\bin\antlr-3.4-complete.jar org.antlr.Tool Tvscript.g -Dlanguage=JavaScript
But I get a series of errors similar to below. I also tried with antlr-3.5.2-complete.jar but get different errors also shown below.
Can anyone help me understand what I am doing wrong. I've tried many things including putting the parser and lexer in different files and using parser grammar TvscriptParser and lexer grammar TvscriptLexer on the first lines. Also I've tried using various different versions of antlr from https://github.com/antlr/website-antlr3/tree/gh-pages/download. 3.4 and 3.5.2 seem to give the best results with this grammar as they seem to be the only versions to accept the -> operator.
antlr-3.4 errors
error(100): Tvscript.g:66:116: syntax error: antlr: NoViableAltException(68@[921:1: rewrite_alternative options {k=1; } : ({...}? => rewrite_template | {...}? => ( rewrite_element )+ -> {!stream_rewrite_element.hasNext()}? ^( ALT[LT(1),"ALT"] EPSILON["epsilon"] EOA["<end-of-alt>"] ) -> ^( ALT[LT(1),"ALT"] ( rewrite_element )+ EOA["<end-of-alt>"] ) | -> ^( ALT[LT(1),"ALT"] EPSILON["epsilon"] EOA["<end-of-alt>"] ) | {...}? ETC );])
error(100): Tvscript.g:66:118: syntax error: antlr: NoViableAltException(75@[])
error(100): Tvscript.g:66:118: syntax error: antlr: MissingTokenException(inserted [@-1,0:0='<missing SEMI>',<52>,66:117] at global_stmt_content)
error(100): Tvscript.g:66:138: syntax error: antlr: MismatchedTokenException(69!=54)
antlr-3.5.2 errors
error(100): Tvscript.g:66:116: syntax error: antlr: NoViableAltException(51@[])
error(100): Tvscript.g:66:118: syntax error: antlr: NoViableAltException(80@[])
error(100): Tvscript.g:66:118: syntax error: antlr: MissingTokenException(inserted [@-1,0:0='<missing SEMI>',<82>,66:117] at global_stmt_content)
grammar Tvscript;
COND : '?' ;
COND_ELSE : ':' ;
OR : 'or' ;
AND : 'and' ;
NOT : 'not' ;
EQ : '==' ;
NEQ : '!=' ;
GT : '>' ;
GE : '>=' ;
LT : '<' ;
LE : '<=' ;
PLUS : '+' ;
MINUS : '-' ;
MUL : '*' ;
DIV : '/' ;
MOD : '%' ;
COMMA : ',' ;
ARROW : '=>' ;
LPAR : '(' ;
RPAR : ')' ;
LSQBR : '[' ;
RSQBR : ']' ;
DEFINE : '=' ;
IF_COND : 'if' ;
IF_COND_ELSE : 'else' ;
BEGIN : '|BEGIN|' ;
END : '|END|' ;
ASSIGN : ':=' ;
FOR_STMT : 'for' ;
FOR_STMT_TO : 'to' ;
FOR_STMT_BY : 'by' ;
BREAK : 'break' ;
CONTINUE : 'continue' ;
LBEG : '|B|' ;
LEND : '|E|' ;
PLEND : '|PE|' ;
INT_LITERAL : ( '0' .. '9' )+ ;
FLOAT_LITERAL : ( '.' DIGITS ( EXP )? | DIGITS ( '.' ( DIGITS ( EXP )? )? | EXP ) );
STR_LITERAL : ( '"' ( ESC | ~ ( '\\' | '\n' | '"' ) )* '"' | '\'' ( ESC | ~ ( '\\' | '\n' | '\'' ) )* '\'' );
BOOL_LITERAL : ( 'true' | 'false' );
COLOR_LITERAL : ( '#' HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT | '#' HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT );
ID : ( ID_LETTER ) ( ( '\.' )? ( ID_BODY '\.' )* ID_BODY )? ;
ID_EX : ( ID_LETTER_EX ) ( ( '\.' )? ( ID_BODY_EX '\.' )* ID_BODY_EX )? ;
INDENT : '|INDENT|' ;
LINE_CONTINUATION : '|C|' ;
EMPTY_LINE_V1 : '|EMPTY_V1|' ;
EMPTY_LINE : '|EMPTY|' ;
WHITESPACE : ( ' ' | '\t' | '\n' )+ ;
fragment ID_BODY : ( ID_LETTER | DIGIT )+ ;
fragment ID_BODY_EX : ( ID_LETTER_EX | DIGIT )+ ;
fragment ID_LETTER : ( 'a' .. 'z' | 'A' .. 'Z' | '_' ) ;
fragment ID_LETTER_EX : ( 'a' .. 'z' | 'A' .. 'Z' | '_' | '#' ) ;
fragment DIGIT : ( '0' .. '9' ) ;
fragment ESC : '\\' . ;
fragment DIGITS : ( '0' .. '9' )+ ;
fragment HEX_DIGIT : ( '0' .. '9' | 'a' .. 'f' | 'A' .. 'F' ) ;
fragment EXP : ( 'e' | 'E' ) ( '+' | '-' )? DIGITS ;
Tokens : ( COND | COND_ELSE | OR | AND | NOT | EQ | NEQ | GT | GE | LT | LE | PLUS | MINUS | MUL | DIV | MOD | COMMA | ARROW | LPAR | RPAR | LSQBR | RSQBR | DEFINE | IF_COND | IF_COND_ELSE | BEGIN | END | ASSIGN | FOR_STMT | FOR_STMT_TO | FOR_STMT_BY | BREAK | CONTINUE | LBEG | LEND | PLEND | INT_LITERAL | FLOAT_LITERAL | STR_LITERAL | BOOL_LITERAL | COLOR_LITERAL | ID | ID_EX | INDENT | LINE_CONTINUATION | EMPTY_LINE_V1 | EMPTY_LINE | WHITESPACE );
tvscript : ( stmt )+ ;
stmt : ( fun_def_stmt | global_stmt_or_multistmt );
global_stmt_or_multistmt : ( BEGIN global_stmt_or_multistmt END | global_stmt_or_multistmt2 | EMPTY_LINE ->);
global_stmt_or_multistmt2 : LBEG global_stmt_content ( COMMA global_stmt_content )* ( COMMA )? ( LEND | PLEND ) -> ( global_stmt_content )+ ;
global_stmt_content : ( var_def | var_defs | fun_call | if_expr | var_assign | for_expr | loop_break | loop_continue );
fun_def_stmt : ( LBEG fun_def_singleline LEND -> fun_def_singleline | LBEG fun_def_multiline PLEND -> fun_def_multiline );
fun_def_singleline : id fun_head ARROW fun_body_singleline -> ^( FUN_DEF id ^( FUN_DEF_EXPR fun_head fun_body_singleline ) ) ;
fun_def_multiline : id fun_head ARROW ( LEND )? fun_body_multiline -> ^( FUN_DEF id ^( FUN_DEF_EXPR fun_head fun_body_multiline ) ) ;
fun_head : LPAR ( id ( COMMA id )* )? RPAR -> ^( FUN_HEAD ( id )* ) ;
fun_body_singleline : stmts= local_stmt_singleline -> ^( FUN_BODY ) ;
local_stmt_singleline : ( BEGIN local_stmt_singleline END | local_stmt_singleline2 );
local_stmt_singleline2 : local_stmt_content ( COMMA local_stmt_content )* ( COMMA )? -> ( local_stmt_content )+ ;
local_stmt_content returns [exprTree] : ( var_def | var_defs | arith_expr | arith_exprs | var_assign | loop_break | loop_continue );
loop_break : BREAK ;
loop_continue : CONTINUE ;
fun_body_multiline : stmts= local_stmts_multiline -> ^( FUN_BODY ) ;
local_stmts_multiline returns [myTree, lastStmtTree] : ( EMPTY_LINE )* BEGIN local_stmts_multiline2 END ;
local_stmts_multiline2 returns [lastStmtTree] : ( local_stmt_multiline )+ ;
local_stmt_multiline returns [lastStmtTree] : ( LBEG s1= local_stmt_content ( COMMA s2= local_stmt_content )* ( COMMA )? ( LEND | PLEND ) -> ( local_stmt_content )+ | EMPTY_LINE ->);
var_def returns [exprTree] : id DEFINE arith_expr -> ^( VAR_DEF id arith_expr ) ;
var_defs returns [exprTree] : ids_array DEFINE arith_expr ->;
var_assign returns [exprTree] : id ASSIGN arith_expr -> ^( VAR_ASSIGN id arith_expr ) ;
ids_array returns [ids] : LSQBR a= id ( COMMA b= id )* RSQBR ->;
arith_exprs : LSQBR a= arith_expr ( COMMA b= arith_expr )* RSQBR -> ^( IDS ) ;
arith_expr : ( ternary_expr | if_expr | for_expr );
if_expr : ( ( IF_COND ternary_expr LEND stmts_block PLEND LBEG IF_COND_ELSE )=> IF_COND ternary_expr LEND x= stmts_block PLEND LBEG IF_COND_ELSE LEND y= stmts_block -> ^( IF_THEN_ELSE ternary_expr THEN ELSE ) | IF_COND ternary_expr LEND x= stmts_block -> ^( IF_THEN ternary_expr THEN ) );
for_expr : ( ( FOR_STMT var_def FOR_STMT_TO ternary_expr FOR_STMT_BY )=> FOR_STMT var_def FOR_STMT_TO end= ternary_expr FOR_STMT_BY step= ternary_expr LEND stmts_block -> ^( FOR var_def stmts_block ) | FOR_STMT var_def FOR_STMT_TO ternary_expr LEND stmts_block -> ^( FOR var_def ternary_expr stmts_block ) );
stmts_block : fun_body_multiline ;
ternary_expr : or_expr ( COND ternary_expr2 )? ;
ternary_expr2 : ternary_expr COND_ELSE ternary_expr ;
or_expr : and_expr ( OR and_expr )* ;
and_expr : eq_expr ( AND eq_expr )* ;
eq_expr : cmp_expr ( ( EQ | NEQ ) cmp_expr )* ;
cmp_expr : add_expr ( ( GT | GE | LT | LE ) add_expr )* ;
add_expr : mult_expr ( ( PLUS | MINUS ) mult_expr )* ;
mult_expr : unary_expr ( ( MUL | DIV | MOD ) unary_expr )* ;
unary_expr : ( sqbr_expr | NOT sqbr_expr -> ^( NOT sqbr_expr ) | PLUS sqbr_expr -> ^( UNARY_PLUS sqbr_expr ) | MINUS sqbr_expr -> ^( UNARY_MINUS sqbr_expr ) );
sqbr_expr : atom ( LSQBR arith_expr RSQBR )? -> { squareBracketsPresent }? ^( SQBR atom arith_expr ) -> atom ;
atom : ( fun_call | id | literal | LPAR arith_expr RPAR -> arith_expr );
fun_call : id LPAR ( fun_actual_args )? RPAR -> ^( FUN_CALL id ( fun_actual_args )? ) ;
fun_actual_args : ( kw_args -> ^( FUN_ARGS kw_args ) | pos_args ( COMMA kw_args )? -> ^( FUN_ARGS pos_args ( kw_args )? ) );
pos_args : arith_expr ( COMMA arith_expr )* -> ( arith_expr )+ ;
kw_args : kw_arg ( COMMA kw_arg )* -> ( kw_arg )+ ;
kw_arg : id DEFINE arith_expr -> ^( KW_ARG id arith_expr ) ;
literal : ( num_literal | other_literal );
num_literal : ( INT_LITERAL | FLOAT_LITERAL );
other_literal : ( STR_LITERAL | BOOL_LITERAL | COLOR_LITERAL );
id returns [text] : ID ->;
synpred1_TVScriptParserMain_v2 : IF_COND ternary_expr LEND stmts_block PLEND LBEG IF_COND_ELSE ;
synpred2_TVScriptParserMain_v2 : FOR_STMT var_def FOR_STMT_TO ternary_expr FOR_STMT_BY ;
Is there any progress on this topic ?
+1
Yes. Any progress? I use this for real work
i have a semi-solution... does EBNF work for you?
don't know if tthis will work, but maybe it's a step towards what you need?
didn't clean out th |xxx| stuff, if you sort those tokens out, please share back if you get it working..
grammar pv4;
Tokens: (
'?'
| ':'
| 'or'
| 'and'
| 'not'
| '=='
| '!='
| '>'
| '>='
| '<'
| '<='
| '+'
| '-'
| '*'
| '/'
| '%'
| ','
| '=>'
| '('
| ')'
| '['
| ']'
| '='
| 'if'
| 'else'
| EOF
| ':='
| 'for'
| 'to'
| 'by'
| 'break'
| 'continue'
| '|B|'
| '|E|'
| '|PE|'
| INT_LITERAL
| FLOAT_LITERAL
| STR_LITERAL
| BOOL_LITERAL
| COLOR_LITERAL
| ID
| ID_EX
| '|INDENT|'
| '|C|'
| '|EMPTY_V1|'
| WHITESPACE
);
fragment SPACES: [\t] + ;
NEWLINE: (( '\r'? '\n' | '\r') SPACES?);
stmt: ( fun_def_stmt | global_stmt_or_multistmt);
global_stmt_or_multistmt: (
global_stmt_or_multistmt2 | NEWLINE
);
global_stmt_or_multistmt2: (
'|B|' global_stmt_content (',' global_stmt_content)* ','? (
'|E|'
| '|PE|'
)
);
global_stmt_content: (
var_def
| var_defs
| fun_call
| if_expr
| var_assign
| for_expr
| 'break'
| 'continue'
);
fun_def_stmt: (
'|B|' (
fun_def_singleline '|E|'
| fun_def_multiline '|PE|'
)
);
fun_def_singleline: ( id fun_head '=>' fun_body_singleline);
fun_def_multiline: ( id fun_head '=>' '|E|'? fun_body_multiline);
fun_head: ( '(' ( id ( ',' id)*)? ')');
fun_body_singleline: ( local_stmt_singleline);
local_stmt_singleline: (
NEWLINE local_stmt_singleline EOF
| local_stmt_singleline2
);
local_stmt_singleline2: (
local_stmt_content (',' local_stmt_content)* ','?
);
local_stmt_content: (
var_def
| var_defs
| arith_expr
| arith_exprs
| var_assign
| 'break'
| 'continue'
);
fun_body_multiline: ( local_stmts_multiline);
local_stmts_multiline: (
NEWLINE local_stmts_multiline2 EOF
);
local_stmts_multiline2: ( local_stmt_multiline+);
local_stmt_multiline: (
'|B|' local_stmt_content (',' local_stmt_content)* ','? (
'|E|'
| '|PE|'
)
);
var_def: ( id '=' arith_expr);
var_defs: ( ids_array '=' arith_expr);
var_assign: ( id ':=' arith_expr);
ids_array: ( '[' id ( ',' id)* ']');
arith_exprs: ( '[' arith_expr ( ',' arith_expr)* ']');
arith_expr: ( ternary_expr | if_expr | for_expr);
if_expr: (
'if' ternary_expr '|E|' stmts_block (
'|PE|' '|B|' 'else' '|E|' stmts_block
)?
);
for_expr: (
'for' var_def 'to' ternary_expr ('by' ternary_expr)? '|E|' stmts_block
);
stmts_block: ( fun_body_multiline);
ternary_expr: ( or_expr ( '?' ternary_expr2)?);
ternary_expr2: ( ternary_expr ':' ternary_expr);
or_expr: ( and_expr ( 'or' and_expr)*);
and_expr: ( eq_expr ( 'and' eq_expr)*);
eq_expr: ( cmp_expr ( ( '==' | '!=') cmp_expr)*);
cmp_expr: ( add_expr ( ( '>' | '>=' | '<' | '<=') add_expr)*);
add_expr: ( mult_expr ( ( '+' | '-') mult_expr)*);
mult_expr: ( unary_expr ( ( '*' | '/' | '%') unary_expr)*);
unary_expr: ( ( 'not' | '+' | '-')? sqbr_expr);
sqbr_expr: ( atom ( '[' arith_expr ']')?);
atom: ( fun_call | id | literal | '(' arith_expr ')');
fun_call: ( id '(' fun_actual_args? ')');
fun_actual_args: ( arguments | pos_args ( ',' arguments)?);
pos_args: ( arith_expr ( ',' arith_expr)*);
arguments: ( kw_arg ( ',' kw_arg)*);
kw_arg: ( id '=' arith_expr);
literal: ( num_literal | other_literal);
num_literal: ( INT_LITERAL | FLOAT_LITERAL);
other_literal: ( STR_LITERAL | BOOL_LITERAL | COLOR_LITERAL);
id: ID;
INT_LITERAL: ( [0-9]+);
FLOAT_LITERAL: (
'.' DIGITS EXP?
| DIGITS ( '.' ( DIGITS EXP?)? | EXP)
);
STR_LITERAL: ( ["'] | ESC);
BOOL_LITERAL: ( 'true' | 'false');
COLOR_LITERAL: (
'#' HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT (
HEX_DIGIT HEX_DIGIT
)?
);
ID: ( ID_LETTER ( '.'? ID_BODY ( '.' ID_BODY)*)?);
fragment ID_EX: (
ID_LETTER_EX ('.'? ID_BODY_EX ( '.' ID_BODY_EX)*)?
);
fragment WHITESPACE: ( [ ]+);
fragment ID_BODY: ( ( ID_LETTER | DIGIT)+);
fragment ID_BODY_EX: ( ( ID_LETTER_EX | DIGIT)+);
fragment ID_LETTER: ( [a-zA-Z_]);
fragment ID_LETTER_EX: ( [a-zA-Z_]);
fragment DIGIT: ( [0-9]);
fragment ESC: ('\\' | .);
fragment DIGITS: ( [0-9]+);
fragment HEX_DIGIT: ( [0-9a-fA-F]);
fragment EXP: ( [Ee] ('+' | '-')? DIGITS);
```