nearley-reverse
nearley-reverse copied to clipboard
Unable to reverse
Hey
I'm trying to use nearley-reverse to be able to generate back original text after parsing using the same grammar definition, but I get really weird result.
# mt101.nearley.ne
@{%
function factory(decode, encode) {
decode.encode = encode
return decode
}
id.encode = x => [x]
%}
genericRecord -> ":" recordId ":" recordContent
{% factory(
([_1, recordId, _2, content]) => {
return {
recordId,
content,
}
},
([ _1, recordId, _2, content ]) => {
return [":", recordId, ":", content]
}
) %}
recordId -> [0-9A-Z]:+
{% factory(
([d]) => d.join(''),
rid => (rid || '').split(''),
) %}
recordContent -> [^:]:+
{% factory(
([d]) => d.join(''),
c => (c || '').split(''),
) %}
// test.js
let nearley = require('nearley')
let reverse = require('nearley-reverse')
let compiledGrammar = require('./mt101.nearley')
let mt101Text = `:20:1106210100000003`
let grammar = nearley.Grammar.fromCompiled(compiledGrammar)
let parser = new nearley.Parser(grammar)
parser.feed(mt101Text)
let result = parser.results[0]
console.log('json\n', JSON.stringify(result, null, 2))
console.log()
let original = reverse(grammar, [':', '20', ':', '1106210100000003']).join('')
console.log('original\n', original)
# output
json
{
"recordId": "20",
"content": "1106210100000003"
}
original
:/[0-9A-Z]/:/[^:]/
What am I doing wrong?
Your encoders aren't quite right. They should take a piece of parse tree and return a list, if I remember correctly:
x => {
return [":", x.recordId, ":", x.content]
}
Finally, I don't know if nearley-reverse supports character groups like [0-9A-Z]
; but it definitely doesn't like EBNF modifiers like :+
.
I hope that helps! 🙂
Hi @tjvr Got it. Is there a way to handle ebnf modifiers? Do I have to use tokenizer for that?
Any thoughts @tjvr ?
I think nearley-reverse
is supposed to be used with a tokenizer, yes.
If you really need EBNF modifiers, you can always write them yourself, e.g. A -> B:+
becomes
A -> B
| A B
I just replaced one field that was previously using EBNF modifiers with a custom rule for :+
:
recordIdChar -> [0-9A-Z]
recordId -> recordIdChar
| recordId recordIdChar
And I still get the same result