Simple template only properly parses the last `if` block
Hi all, apologies if this is the wrong place but I'm struggling to understand exactly where the problem demarkation ends.
I'm building an Emacs mode using tree-sitter-embedded-template for ERB and I've hit an annoying snag.
Mode code below if anyone wants to help reproduce, but you'll need a recent version of Emacs to get html-ts-mode:
(require 'treesit)
(require 'ruby-ts-mode)
(require 'html-ts-mode)
(require 'sgml-mode)
(defvar erb-ts-mode--range-settings
(treesit-range-rules
:embed 'ruby
:host 'embedded-template
'((code) @capture)
:embed 'html
:host 'embedded-template
'((content) @capture)))
(defun erb-ts-setup ()
"Setup treesit for `erb-ts-mode'."
(setq-local electric-pair-pairs
'((?\< . ?\>)
(?\% . ?\%)
(?\{ . ?\})
(?\( . ?\))
(?\[ . ?\])
(?\' . ?\')
(?\" . ?\")))
(setq-local treesit-range-settings erb-ts-mode--range-settings)
(treesit-major-mode-setup))
;;;###autoload
(define-derived-mode erb-ts-mode html-mode "ERB[ts]"
"\nMajor mode for editing ERB with tree-sitter."
:syntax-table html-mode-syntax-table
(unless (treesit-ready-p 'ruby)
(error "Tree-sitter grammar for Ruby isn't available"))
(unless (treesit-ready-p 'html)
(error "Tree-sitter grammar for HTML isn't available"))
(unless (treesit-ready-p 'embedded-template)
(error "Tree-sitter grammar for ERB isn't available"))
(when (treesit-ready-p 'embedded-template)
(setq-local treesit-primary-parser (treesit-parser-create 'embedded-template))
(erb-ts-setup)))
<% if check.none? %>
<%= t('.no_check') %>
<% end %>
<% if another_check.any? %>
<%= t('.another_check') %>
<% end %>
<% if final_check.any? %>
<%= t('.final_check') %>
<% end %>
Given the above ERB, I get the below for the Ruby tree.
As you can see, only the last if statement is correctly parsed. This continues if I introduce other if blocks after, only the last one is correctly parsed.
Now the previously correct block is wrong.
Now, where I think this probably highlights it's not a grammar issue is if I try and run the following Node code:
const Parser = require('tree-sitter');
const EmbeddedTemplate = require('tree-sitter-embedded-template');
const HTML = require('tree-sitter-html');
const Ruby = require('tree-sitter-ruby');
function parseTemplateFile(text) {
// Create a new parser instance
const parser = new Parser();
// Parse the entire text as ERB first
parser.setLanguage(EmbeddedTemplate);
const erbTree = parser.parse(text);
const erbRootNode = erbTree.rootNode;
// Initialize arrays to store HTML and Ruby ranges
const htmlRanges = [];
const rubyRanges = [];
// Iterate through child nodes to find content and code sections
for (let i = 0; i < erbRootNode.childCount; i++) {
const node = erbRootNode.child(i);
if (node.type === 'content') {
htmlRanges.push({
startIndex: node.startIndex,
endIndex: node.endIndex,
startPosition: node.startPosition,
endPosition: node.endPosition
});
} else {
const codeNode = node.namedChildren[0];
if (codeNode) {
rubyRanges.push({
startIndex: codeNode.startIndex,
endIndex: codeNode.endIndex,
startPosition: codeNode.startPosition,
endPosition: codeNode.endPosition
});
}
}
}
// Parse HTML sections with a new parser instance
const htmlParser = new Parser();
htmlParser.setLanguage(HTML);
const htmlTree = htmlParser.parse(text); // Initial parse
const htmlTree2 = htmlParser.parse(text, htmlTree, { includedRanges: htmlRanges }); // Parse with ranges
const htmlRootNode = htmlTree2.rootNode;
// Parse Ruby sections with another new parser instance
const rubyParser = new Parser();
rubyParser.setLanguage(Ruby);
const rubyTree = rubyParser.parse(text); // Initial parse
const rubyTree2 = rubyParser.parse(text, rubyTree, { includedRanges: rubyRanges }); // Parse with ranges
const rubyRootNode = rubyTree2.rootNode;
// Return the S-expressions for all three trees
return {
erb: erbRootNode.toString(),
html: htmlRootNode.toString(),
ruby: rubyRootNode.toString()
};
}
// Command line usage
if (require.main === module) {
const text = process.argv[2];
if (!text) {
console.error('Please provide a text string as an argument');
process.exit(1);
}
try {
const results = parseTemplateFile(text);
console.log('ERB:', results.erb);
console.log('HTML:', results.html);
console.log('Ruby:', results.ruby);
} catch (error) {
console.error('Error parsing template:', error);
process.exit(1);
}
}
module.exports = parseTemplateFile;
I get the correct output.
$ node index.js '<% if check.none? %>
<h2><%= t('\''.no_check'\'') %></h2>
<% end %>
<% if another_check.any? %>
<div>
<%= t('\''.another_check'\'') %>
</div>
<% end %>
<% if final_check.any? %>
<%= render '\''table'\'' %>
<% end %>
'
ERB: (template (directive (code)) (content) (output_directive (code)) (content) (directive (code)) (content) (directive (code)) (content) (output_directive (code)) (content) (directive (code)) (content) (directive (code)) (content) (output_directive (code)) (content) (directive (code)) (content))
HTML: (document (element (start_tag (tag_name)) (end_tag (tag_name))) (element (start_tag (tag_name)) (end_tag (tag_name))))
Ruby: (program (if condition: (call receiver: (identifier) method: (identifier)) consequence: (then (call method: (identifier) arguments: (argument_list (string (string_content)))))) (if condition: (call receiver: (identifier) method: (identifier)) consequence: (then (call method: (identifier) arguments: (argument_list (string (string_content)))))) (if condition: (call receiver: (identifier) method: (identifier)) consequence: (then (call method: (identifier) arguments: (argument_list (string (string_content)))))))
So I'm stumped, and while this is seemingly somehow an Emacs issue, I'm hoping that you can maybe shed some light.
Thanks and apologies again if this is indeed not a grammar problem.