f-strings produce `SIGSEGV`
repro : https://github.com/rebcabin/lpython/tree/brian-lasr/lasr/lpython/Issue1946
The following works in ordinary CPython:
hexit : str = r'[0-9a-fA-F]'
octit : str = r'[0-7]'
digit : str = r'[0-9]'
alpha : str = r'[_a-zA-Z]'
asr_alpha_mer : str = r'[_a-zA-Z0-9@~]'
# Really want Pattern[str] and re.compile
hex_pat : str = fr'-?0[xX]{hexit}+'
oct_pat : str = fr'-?0{octit}+'
int_pat : str = fr'-?{digit}+'
dec_pat : str = fr'(-?{digit})M'
# Not kosher Clojure
sym_pat : str = fr'({alpha}{asr_alpha_mer}*)'
LPython produces a SIGSEV (after commenting out the compatibility block at the top):
(lp) ┌─(~/CLionProjects/lpython/lasr/lpython)────────────────────────────────────────────────────────────────────────────────────────────────────────────(brian@Golf37:s000)─┐
└─(15:51:59 on brian-lasr ✹ ✭)──> ~/CLionProjects/lpython/src/bin/lpython Issue1946/lasr_lexer.py && ./lasr_lexer.out 2 ↵ ──(Sun,Jun18)─┘
Internal Compiler Error: Unhandled exception
Traceback (most recent call last):
File "/Users/brian/CLionProjects/lpython/src/bin/lpython.cpp", line 1828
std::string emit_file_name = basename + "__tmp__generated__.c";
File "/Users/brian/CLionProjects/lpython/src/bin/lpython.cpp", line 783
r1 = LCompilers::LPython::python_ast_to_asr(al, lm, *ast, diagnostics, compiler_options,
File "/Users/brian/CLionProjects/lpython/src/lpython/semantics/python_ast_to_asr.cpp", line 7429
diag::Diagnostics &diagnostics,
File "/Users/brian/CLionProjects/lpython/src/lpython/semantics/python_ast_to_asr.cpp", line 7382
target_type = ASRUtils::TYPE(ASR::make_Integer_t(al, x.base.base.loc, 8));
File "/Users/brian/CLionProjects/lpython/src/lpython/semantics/python_ast_to_asr.cpp", line 4492
void transform_stmts(Vec<ASR::stmt_t*> &body, size_t n_body, AST::stmt_t **m_body) {
File "/Users/brian/CLionProjects/lpython/src/lpython/python_ast.h", line 1883
void visit_stmt(const stmt_t &b) { visit_stmt_t(b, self()); }
File "/Users/brian/CLionProjects/lpython/src/lpython/python_ast.h", line 1757
case stmtType::AugAssign: { v.visit_AugAssign((const AugAssign_t &)x); return; }
File "/Users/brian/CLionProjects/lpython/src/lpython/semantics/python_ast_to_asr.cpp", line 4725
}
File "/Users/brian/CLionProjects/lpython/src/lpython/semantics/python_ast_to_asr.cpp", line 2707
}
File "/Users/brian/CLionProjects/lpython/src/lpython/python_ast.h", line 1910
void visit_expr(const expr_t &b) { visit_expr_t(b, self()); }
File "/Users/brian/CLionProjects/lpython/src/lpython/python_ast.h", line 1801
case exprType::FormattedValue: { v.visit_FormattedValue((const FormattedValue_t &)x); return; }
File "/Users/brian/CLionProjects/lpython/src/lpython/python_ast.h", line 1929
void visit_JoinedStr(const JoinedStr_t & /* x */) { throw LCompilersException("visit_JoinedStr() not implemented"); }
LCompilersException: visit_JoinedStr() not implemented
Workaround requires TWO things: manually interpolating the {...} references AND removing the f from fr. The following works in LPython:
hex_pat : str = r'-?0[xX][0-9a-fA-F]+'
oct_pat : str = r'-?0[0-7]+'
int_pat : str = r'-?[0-9]+'
dec_pat : str = r'(-?[0-9])M'
# Not kosher Clojure
sym_pat : str = r'([_a-zA-Z][_a-zA-Z0-9@~]*)'
Yes, we should definitely implement f-strings, at least the (large) subset that can be fully done at compile time (i.e., the formatting string is known at compile time, the variables are runtime, but we know their type), such as all your examples above.
At the very least the error message must be better.