tree-sitter-embedded-template icon indicating copy to clipboard operation
tree-sitter-embedded-template copied to clipboard

Simple template only properly parses the last `if` block

Open elken opened this issue 1 year ago • 0 comments

Hi all, apologies if this is the wrong place but I'm struggling to understand exactly where the problem demarkation ends.

I'm building an Emacs mode using tree-sitter-embedded-template for ERB and I've hit an annoying snag.

Mode code below if anyone wants to help reproduce, but you'll need a recent version of Emacs to get html-ts-mode:

(require 'treesit)
(require 'ruby-ts-mode)
(require 'html-ts-mode)
(require 'sgml-mode)

(defvar erb-ts-mode--range-settings
  (treesit-range-rules
   :embed 'ruby
   :host 'embedded-template
   '((code) @capture)

   :embed 'html
   :host 'embedded-template
   '((content) @capture)))

(defun erb-ts-setup ()
  "Setup treesit for `erb-ts-mode'."
  (setq-local electric-pair-pairs
              '((?\< . ?\>)
                (?\% . ?\%)
                (?\{ . ?\})
                (?\( . ?\))
                (?\[ . ?\])
                (?\' . ?\')
                (?\" . ?\")))
    
  (setq-local treesit-range-settings erb-ts-mode--range-settings)
  
  (treesit-major-mode-setup))

;;;###autoload
(define-derived-mode erb-ts-mode html-mode "ERB[ts]"
  "\nMajor mode for editing ERB with tree-sitter."
  :syntax-table html-mode-syntax-table

  (unless (treesit-ready-p 'ruby)
    (error "Tree-sitter grammar for Ruby isn't available"))

  (unless (treesit-ready-p 'html)
    (error "Tree-sitter grammar for HTML isn't available"))

  (unless (treesit-ready-p 'embedded-template)
    (error "Tree-sitter grammar for ERB isn't available"))
  
  (when (treesit-ready-p 'embedded-template)
    (setq-local treesit-primary-parser (treesit-parser-create 'embedded-template))
    (erb-ts-setup)))
<% if check.none? %>
  <%= t('.no_check') %>
<% end %>
<% if another_check.any? %>
  <%= t('.another_check') %>
<% end %>
<% if final_check.any? %>
  <%= t('.final_check') %>
<% end %>

Given the above ERB, I get the below for the Ruby tree.

image

As you can see, only the last if statement is correctly parsed. This continues if I introduce other if blocks after, only the last one is correctly parsed.

image

Now the previously correct block is wrong.

Now, where I think this probably highlights it's not a grammar issue is if I try and run the following Node code:

const Parser = require('tree-sitter');
const EmbeddedTemplate = require('tree-sitter-embedded-template');
const HTML = require('tree-sitter-html');
const Ruby = require('tree-sitter-ruby');

function parseTemplateFile(text) {
  // Create a new parser instance
  const parser = new Parser();
  
  // Parse the entire text as ERB first
  parser.setLanguage(EmbeddedTemplate);
  const erbTree = parser.parse(text);
  const erbRootNode = erbTree.rootNode;

  // Initialize arrays to store HTML and Ruby ranges
  const htmlRanges = [];
  const rubyRanges = [];

  // Iterate through child nodes to find content and code sections
  for (let i = 0; i < erbRootNode.childCount; i++) {
    const node = erbRootNode.child(i);
    
    if (node.type === 'content') {
      htmlRanges.push({
        startIndex: node.startIndex,
        endIndex: node.endIndex,
        startPosition: node.startPosition,
        endPosition: node.endPosition
      });
    } else {
      const codeNode = node.namedChildren[0];
      if (codeNode) {
        rubyRanges.push({
          startIndex: codeNode.startIndex,
          endIndex: codeNode.endIndex,
          startPosition: codeNode.startPosition,
          endPosition: codeNode.endPosition
        });
      }
    }
  }

  // Parse HTML sections with a new parser instance
  const htmlParser = new Parser();
  htmlParser.setLanguage(HTML);
  const htmlTree = htmlParser.parse(text);  // Initial parse
  const htmlTree2 = htmlParser.parse(text, htmlTree, { includedRanges: htmlRanges });  // Parse with ranges
  const htmlRootNode = htmlTree2.rootNode;

  // Parse Ruby sections with another new parser instance
  const rubyParser = new Parser();
  rubyParser.setLanguage(Ruby);
  const rubyTree = rubyParser.parse(text);  // Initial parse
  const rubyTree2 = rubyParser.parse(text, rubyTree, { includedRanges: rubyRanges });  // Parse with ranges
  const rubyRootNode = rubyTree2.rootNode;

  // Return the S-expressions for all three trees
  return {
    erb: erbRootNode.toString(),
    html: htmlRootNode.toString(),
    ruby: rubyRootNode.toString()
  };
}

// Command line usage
if (require.main === module) {
  const text = process.argv[2];
  if (!text) {
    console.error('Please provide a text string as an argument');
    process.exit(1);
  }

  try {
    const results = parseTemplateFile(text);
    console.log('ERB:', results.erb);
    console.log('HTML:', results.html);
    console.log('Ruby:', results.ruby);
  } catch (error) {
    console.error('Error parsing template:', error);
    process.exit(1);
  }
}

module.exports = parseTemplateFile;

I get the correct output.

$ node index.js '<% if check.none? %>
  <h2><%= t('\''.no_check'\'') %></h2>
<% end %>
<% if another_check.any? %>
  <div>
    <%= t('\''.another_check'\'') %>
  </div>
<% end %>
<% if final_check.any? %>
  <%= render '\''table'\'' %>
<% end %>
'
ERB: (template (directive (code)) (content) (output_directive (code)) (content) (directive (code)) (content) (directive (code)) (content) (output_directive (code)) (content) (directive (code)) (content) (directive (code)) (content) (output_directive (code)) (content) (directive (code)) (content))
HTML: (document (element (start_tag (tag_name)) (end_tag (tag_name))) (element (start_tag (tag_name)) (end_tag (tag_name))))
Ruby: (program (if condition: (call receiver: (identifier) method: (identifier)) consequence: (then (call method: (identifier) arguments: (argument_list (string (string_content)))))) (if condition: (call receiver: (identifier) method: (identifier)) consequence: (then (call method: (identifier) arguments: (argument_list (string (string_content)))))) (if condition: (call receiver: (identifier) method: (identifier)) consequence: (then (call method: (identifier) arguments: (argument_list (string (string_content)))))))

So I'm stumped, and while this is seemingly somehow an Emacs issue, I'm hoping that you can maybe shed some light.

Thanks and apologies again if this is indeed not a grammar problem.

elken avatar Jan 07 '25 11:01 elken