tree-sitter-ruby icon indicating copy to clipboard operation
tree-sitter-ruby copied to clipboard

bug: leading method comment is outside method body

Open adamcohen2 opened this issue 1 year ago • 1 comments

Did you check existing issues?

  • [X] I have read all the tree-sitter docs if it relates to using the parser
  • [X] I have searched the existing issues of tree-sitter-c

Tree-Sitter CLI Version, if relevant (output of tree-sitter --version)

tree-sitter 0.24.4 (fc8c1863e2e5724a0c40bb6e6cfc8631bfe5908b)

Describe the bug

When calling NextSibling on a leading method comment, the entire method body is returned, instead of only the next statement.

For example, given the following ruby code:

def foo
  # comment1
  puts "statement 1"
  puts "statement 2"
  # comment2
  puts "statement 3"
end

foo

If a node is pointing to # comment1 and I call node.NextSibling(), I would expect this node to be: puts "statement 1", however, it's actually the entire function body:

  puts "statement 1"
  puts "statement 2"
  # comment2
  puts "statement 3"

Is this a bug, or this by design? Because tree-sitter-go doesn't behave this way, and returns the single-line statement following the comment node as expected:

Click to expand example code
package main

import (
	"fmt"

	tree_sitter "github.com/tree-sitter/go-tree-sitter"
	tree_sitter_go "github.com/tree-sitter/tree-sitter-go/bindings/go"
	tree_sitter_ruby "github.com/tree-sitter/tree-sitter-ruby/bindings/go"
)

func main() {
	rubySource := []byte(`
def foo
  # comment1
  puts "statement 1"
  puts "statement 2"
  # comment2
  puts "statement 3"
end

foo
	`)

	goSource := []byte(`
package main

import "fmt"

func foo() {
	// comment1
	fmt.Println("statement 1")
	fmt.Println("statement 2")
	// comment2
	fmt.Println("statement 3")
}

func main() {
	foo()
}
`)

	rubyLang := tree_sitter.NewLanguage(tree_sitter_ruby.Language())
	OutputCommentAndStatement("comment1", "#", "ruby", rubySource, rubyLang)
	OutputCommentAndStatement("comment2", "#", "ruby", rubySource, rubyLang)

	goLang := tree_sitter.NewLanguage(tree_sitter_go.Language())
	OutputCommentAndStatement("comment1", "//", "golang", goSource, goLang)
	OutputCommentAndStatement("comment2", "//", "golang", goSource, goLang)
}

func OutputCommentAndStatement(comment, commentSymbol, langName string, sourceCode []byte, language *tree_sitter.Language) {
	parser := tree_sitter.NewParser()
	defer parser.Close()
	parser.SetLanguage(language)

	tree := parser.Parse(sourceCode, nil)
	defer tree.Close()

	rawQuery := fmt.Sprintf(`((comment) @comment (#match? @comment "^%s %s"))`, commentSymbol, comment)

	query, _ := tree_sitter.NewQuery(language, rawQuery)
	defer query.Close()

	qc := tree_sitter.NewQueryCursor()
	defer qc.Close()

	captures := qc.Captures(query, tree.RootNode(), sourceCode)

	for match, index := captures.Next(); match != nil; match, index = captures.Next() {
		node := match.Captures[index].Node

		sibling := node.NextSibling()
		fmt.Printf("languge: %s\n", langName)
		fmt.Printf("sibling node contents for %s\n", comment)
		fmt.Println("\nBEGIN STATEMENT")
		fmt.Printf("%s\n", sibling.Utf8Text(sourceCode))
		fmt.Println("END STATEMENT\n")
	}
}

output:

languge: ruby
sibling node contents for comment1

BEGIN STATEMENT
puts "statement 1"
  puts "statement 2"
  # comment2
  puts "statement 3"
END STATEMENT

languge: ruby
sibling node contents for comment2

BEGIN STATEMENT
puts "statement 3"
END STATEMENT

languge: golang
sibling node contents for comment1

BEGIN STATEMENT
fmt.Println("statement 1")
END STATEMENT

languge: golang
sibling node contents for comment2

BEGIN STATEMENT
fmt.Println("statement 3")
END STATEMENT

Steps To Reproduce/Bad Parse Tree

Input code:

def foo
  # comment1
  puts "statement 1"
  puts "statement 2"
  # comment2
  puts "statement 3"
end

Parse tree. Leading method comment is unexpectedly outside method body:

(program [0, 0] - [6, 3]
  (method [0, 0] - [6, 3]
    name: (identifier [0, 4] - [0, 7])
    (comment [1, 2] - [1, 12])
    body: (body_statement [2, 2] - [5, 20]
      (call [2, 2] - [2, 20]
      <snip>

Expected Behavior/Parse Tree

Expected parse tree, leading method comment should be inside method body:

(program [0, 0] - [6, 3]
  (method [0, 0] - [6, 3]
    name: (identifier [0, 4] - [0, 7])
    body: (body_statement [2, 2] - [5, 20]
      (comment [1, 2] - [1, 12])
      (call [2, 2] - [2, 20]
      <snip>

Repro

def foo
  # comment1
  puts "statement 1"
  puts "statement 2"
  # comment2
  puts "statement 3"
end

adamcohen2 avatar Nov 20 '24 06:11 adamcohen2