javalang
javalang copied to clipboard
extracting each methods in .java file
Hi. I want to find out each method in a java file, then get the method's code.
I appreciate if you can help me with this.
+1
Actually, what I'm looking for is a way to get the code corresponding to a non-terminal in the AST. Any hints?
I could use for path, node in tree.filter(javalang.tree.MethodDeclaration): ... to find all method declarations, but I can't find any API to recover a sub-tree into corresponding source code.
I am having the same problem. I am using this code snippet to extract the string of a node:
def __get_string_for_node(self, node: javalang.parser.tree.Node):
start, end = self.__get_start_end_for_node(node)
return self.__get_string(start, end)
def __get_start_end_for_node(self, node_to_find: javalang.parser.tree.Node) -> Tuple[javalang.tokenizer.Position, javalang.tokenizer.Position]:
start = None
end = None
for path, node in self.compilation_unit:
if start is not None and node_to_find not in path:
end = node.position
return start, end
if start is None and node == node_to_find:
start = node.position
return start, end
def __get_string(self, start: javalang.tokenizer.Position, end: javalang.tokenizer.Position):
if start is None:
return ""
# positions are all offset by 1. e.g. first line -> lines[0], start.line = 1
end_pos = None
if end is not None:
end_pos = end.line - 1
lines = self.file_content.splitlines(True)
string = "".join(lines[start.line:end_pos])
string = lines[start.line - 1][start.column - 1:None] + string
if end is not None:
string += lines[end.line - 1][:end.column - 1]
return string
This, however, has quite a few problems: It ignores annotations and modifiers, it may copy the following code, such as annotations, modifiers, and comments, so some post-processing is needed to clean this up again.
Can someone please tell how can I get code of all methods present in a .java file. For example, if the code is:
`
public void addNode(int data) {
//Create a new node
Node newNode = new Node(data);
//Checks if the list is empty
if(head == null) {
//If list is empty, both head and tail will point to new node
head = newNode;
tail = newNode;
}
else {
//newNode will be added after tail such that tail's next will point to newNode
tail.next = newNode;
//newNode will become new tail of the list
tail = newNode;
}
}
public void display() {
Node current = head;
if(head == null) {
System.out.println("List is empty");
return;
}
System.out.println("Nodes of singly linked list: ");
while(current != null) {
System.out.print(current.data + " ");
current = current.next;
}
System.out.println();
}
I need the following output:
{method 1: public void addNode(int data) { ......}
method 2: public void display(){....}
}
`
Referring to the code of @pfehrmann , I wrote a demo.
Suppose there are three methods in the file Main.java :
public class Main {
public static void main(String[] args) {
System.out.println("hello world");
}
public void foo() {
System.out.println("foo");
}
public void bar() {
System.out.println("bar");
}
}
My code is :
import javalang as jl
def __get_start_end_for_node(node_to_find):
start = None
end = None
for path, node in tree:
if start is not None and node_to_find not in path:
end = node.position
return start, end
if start is None and node == node_to_find:
start = node.position
return start, end
def __get_string(start, end):
if start is None:
return ""
# positions are all offset by 1. e.g. first line -> lines[0], start.line = 1
end_pos = None
if end is not None:
end_pos = end.line - 1
lines = data.splitlines(True)
string = "".join(lines[start.line:end_pos])
string = lines[start.line - 1] + string
# When the method is the last one, it will contain a additional brace
if end is None:
left = string.count("{")
right = string.count("}")
if right - left == 1:
p = string.rfind("}")
string = string[:p]
return string
if __name__ == '__main__':
data = open("./Main.java").read()
tree = jl.parse.parse(data)
methods = {}
for _, node in tree.filter(jl.tree.MethodDeclaration):
start, end = __get_start_end_for_node(node)
methods[node.name] = __get_string(start, end)
print(methods)
Run the code and get:
{'main': ' public static void main(String[] args) {\n System.out.println("hello world");\n }\n\n', 'foo': ' public void foo() {\n System.out.println("foo");\n }\n\n', 'bar': ' public void bar() {\n System.out.println("bar");\n }\n'}
Hope this helps you :) #
This seems to work well! It's important to note that methods is a dictionary type, if you want to see properly formatted output, use this code instead of print(methods)
for method in methods:
print(methods[method])
Here's an extended version of @pfehrmann and @fgksgf 's code.
What's new?
- It includes accurate startlines and endlines.
- This should also include annotations and modifiers.
import javalang
def get_method_start_end(method_node):
startpos = None
endpos = None
startline = None
endline = None
for path, node in tree:
if startpos is not None and method_node not in path:
endpos = node.position
endline = node.position.line if node.position is not None else None
break
if startpos is None and node == method_node:
startpos = node.position
startline = node.position.line if node.position is not None else None
return startpos, endpos, startline, endline
def get_method_text(startpos, endpos, startline, endline, last_endline_index):
if startpos is None:
return "", None, None, None
else:
startline_index = startline - 1
endline_index = endline - 1 if endpos is not None else None
# 1. check for and fetch annotations
if last_endline_index is not None:
for line in codelines[(last_endline_index + 1):(startline_index)]:
if "@" in line:
startline_index = startline_index - 1
meth_text = "<ST>".join(codelines[startline_index:endline_index])
meth_text = meth_text[:meth_text.rfind("}") + 1]
# 2. remove trailing rbrace for last methods & any external content/comments
# if endpos is None and
if not abs(meth_text.count("}") - meth_text.count("{")) == 0:
# imbalanced braces
brace_diff = abs(meth_text.count("}") - meth_text.count("{"))
for _ in range(brace_diff):
meth_text = meth_text[:meth_text.rfind("}")]
meth_text = meth_text[:meth_text.rfind("}") + 1]
meth_lines = meth_text.split("<ST>")
meth_text = "".join(meth_lines)
last_endline_index = startline_index + (len(meth_lines) - 1)
return meth_text, (startline_index + 1), (last_endline_index + 1), last_endline_index
if __name__ == "__main__":
target_file = "./Main.java" # Add the path to your file
with open(target_file, 'r') as r:
codelines = r.readlines()
code_text = ''.join(codelines)
lex = None
tree = javalang.parse.parse(code_text)
methods = {}
for _, method_node in tree.filter(javalang.tree.MethodDeclaration):
startpos, endpos, startline, endline = get_method_start_end(method_node)
method_text, startline, endline, lex = get_method_text(startpos, endpos, startline, endline, lex)
methods[method_node.name] = method_text
print("--- ### ---")
print(method_text)
print("Line range: ", startline, " - ", endline)
print("--- ### ---")
@anjandash Thank you! it was helpful :)
@anjandash When there are internal classes in the file, the code seems to have some exceptions...(There seems to be some problems with the endline)
Hi @Sichengluis, thanks for letting me know! Can you please post an example here from the cases you encountered?
@anjandash Hi, thanks for your reply! I'm actually using your code to parse the source code of Ant. There are some problems when parsing the file projecthelper 2, because it involves the internal class. When a method is at the end of the internal class (such as the endprefixmapping method), the code sometimes can't correctly find the end line of the code. The code of this file is as follows.
Hi @anjandash, I am using you code and I encounter the same exception mentioned by @Sichengluis, the code cannot find the endline of methods in internal classes. If you looked into the issue and found a solution I'd be very interested.
What I am doing currently is parsing manually the text and interrupting after the brace the closes the method
How can I get the method signature using this code snippet? The output should be something like this,
"packageName.className.methodName : methodSignature : lineNumber"
For example something like this:
org.apache.commons.lang.NumberUtils:<init>()V:47
org.apache.commons.lang.NumberUtils:<init>()V:48
org.apache.commons.lang.NumberUtils:stringToInt(Ljava/lang/String;)I:61
org.apache.commons.lang.NumberUtils:stringToInt(Ljava/lang/String;I)I:74
org.apache.commons.lang.NumberUtils:stringToInt(Ljava/lang/String;I)I:75
org.apache.commons.lang.NumberUtils:stringToInt(Ljava/lang/String;I)I:76
Here's a version of @anjandash 's code that should support inner class.
import javalang
def get_method_start_end(method_node):
startpos = None
endpos = None
startline = None
endline = None
for path, node in tree:
if startpos is not None and method_node not in path:
endpos = node.position
endline = node.position.line if node.position is not None else None
break
if startpos is None and node == method_node:
startpos = node.position
startline = node.position.line if node.position is not None else None
return startpos, endpos, startline, endline
def get_method_text(startpos, endpos, startline, endline, last_endline_index):
if startpos is None:
return "", None, None, None
else:
startline_index = startline - 1
endline_index = endline - 1 if endpos is not None else None
# 1. check for and fetch annotations
if last_endline_index is not None:
for line in codelines[(last_endline_index + 1):(startline_index)]:
if "@" in line:
startline_index = startline_index - 1
meth_text = "<ST>".join(codelines[startline_index:endline_index])
meth_text = meth_text[:meth_text.rfind("}") + 1]
# 2. remove trailing rbrace for last methods & any external content/comments
# if endpos is None and
if not abs(meth_text.count("}") - meth_text.count("{")) == 0:
# imbalanced braces
brace_diff = abs(meth_text.count("}") - meth_text.count("{"))
for _ in range(brace_diff):
meth_text = meth_text[:meth_text.rfind("}")]
meth_text = meth_text[:meth_text.rfind("}") + 1]
meth_lines = meth_text.split("<ST>")
meth_text = "".join(meth_lines)
last_endline_index = startline_index + (len(meth_lines) - 1)
return meth_text, (startline_index + 1), (last_endline_index + 1), last_endline_index
if __name__ == "__main__":
target_file = "./Main.java" # Add the path to your file
with open(target_file, 'r') as r:
codelines = r.readlines()
code_text = ''.join(codelines)
lex = None
tree = javalang.parse.parse(code_text)
methods = {}
for _, method_node in tree.filter(javalang.tree.MethodDeclaration):
startpos, endpos, startline, endline = get_method_start_end(method_node)
method_text, startline, endline, lex = get_method_text(startpos, endpos, startline, endline, lex)
methods[method_node.name] = method_text
print("--- ### ---")
print(method_text)
print("Line range: ", startline, " - ", endline)
print("--- ### ---")
Here's a version of @anjandash 's code that should support inner class.
import javalang def get_method_start_end(method_node): startpos = None endpos = None startline = None endline = None for path, node in tree: if startpos is not None and method_node not in path: endpos = node.position endline = node.position.line if node.position is not None else None break if startpos is None and node == method_node: startpos = node.position startline = node.position.line if node.position is not None else None return startpos, endpos, startline, endline def get_method_text(startpos, endpos, startline, endline, last_endline_index): if startpos is None: return "", None, None, None else: startline_index = startline - 1 endline_index = endline - 1 if endpos is not None else None # 1. check for and fetch annotations if last_endline_index is not None: for line in codelines[(last_endline_index + 1):(startline_index)]: if "@" in line: startline_index = startline_index - 1 meth_text = "<ST>".join(codelines[startline_index:endline_index]) meth_text = meth_text[:meth_text.rfind("}") + 1] # 2. remove trailing rbrace for last methods & any external content/comments # if endpos is None and if not abs(meth_text.count("}") - meth_text.count("{")) == 0: # imbalanced braces brace_diff = abs(meth_text.count("}") - meth_text.count("{")) for _ in range(brace_diff): meth_text = meth_text[:meth_text.rfind("}")] meth_text = meth_text[:meth_text.rfind("}") + 1] meth_lines = meth_text.split("<ST>") meth_text = "".join(meth_lines) last_endline_index = startline_index + (len(meth_lines) - 1) return meth_text, (startline_index + 1), (last_endline_index + 1), last_endline_index if __name__ == "__main__": target_file = "./Main.java" # Add the path to your file with open(target_file, 'r') as r: codelines = r.readlines() code_text = ''.join(codelines) lex = None tree = javalang.parse.parse(code_text) methods = {} for _, method_node in tree.filter(javalang.tree.MethodDeclaration): startpos, endpos, startline, endline = get_method_start_end(method_node) method_text, startline, endline, lex = get_method_text(startpos, endpos, startline, endline, lex) methods[method_node.name] = method_text print("--- ### ---") print(method_text) print("Line range: ", startline, " - ", endline) print("--- ### ---")
It worked. Thanks. My query is if I pass the method name and want to extract that specific method code, how to call it.
Here's a version of @anjandash 's code that should support inner class.
import javalang def get_method_start_end(method_node): startpos = None endpos = None startline = None endline = None for path, node in tree: if startpos is not None and method_node not in path: endpos = node.position endline = node.position.line if node.position is not None else None break if startpos is None and node == method_node: startpos = node.position startline = node.position.line if node.position is not None else None return startpos, endpos, startline, endline def get_method_text(startpos, endpos, startline, endline, last_endline_index): if startpos is None: return "", None, None, None else: startline_index = startline - 1 endline_index = endline - 1 if endpos is not None else None # 1. check for and fetch annotations if last_endline_index is not None: for line in codelines[(last_endline_index + 1):(startline_index)]: if "@" in line: startline_index = startline_index - 1 meth_text = "<ST>".join(codelines[startline_index:endline_index]) meth_text = meth_text[:meth_text.rfind("}") + 1] # 2. remove trailing rbrace for last methods & any external content/comments # if endpos is None and if not abs(meth_text.count("}") - meth_text.count("{")) == 0: # imbalanced braces brace_diff = abs(meth_text.count("}") - meth_text.count("{")) for _ in range(brace_diff): meth_text = meth_text[:meth_text.rfind("}")] meth_text = meth_text[:meth_text.rfind("}") + 1] meth_lines = meth_text.split("<ST>") meth_text = "".join(meth_lines) last_endline_index = startline_index + (len(meth_lines) - 1) return meth_text, (startline_index + 1), (last_endline_index + 1), last_endline_index if __name__ == "__main__": target_file = "./Main.java" # Add the path to your file with open(target_file, 'r') as r: codelines = r.readlines() code_text = ''.join(codelines) lex = None tree = javalang.parse.parse(code_text) methods = {} for _, method_node in tree.filter(javalang.tree.MethodDeclaration): startpos, endpos, startline, endline = get_method_start_end(method_node) method_text, startline, endline, lex = get_method_text(startpos, endpos, startline, endline, lex) methods[method_node.name] = method_text print("--- ### ---") print(method_text) print("Line range: ", startline, " - ", endline) print("--- ### ---")It worked. Thanks. My query is if I pass the method name and want to extract that specific method code, how to call it.
It should be easy to extract the specific method body by name if you have all the method bodies extracted.
To save time, simply add an 'if' condition that filters by method name in the for loop in the main function.