DeepSeek-Coder
DeepSeek-Coder copied to clipboard
Regex of HASDEPENDENCY in Dependency Parsing
Could you share more details of regex used in Dependency Parsing for each language?
We design different regex patterns according to the grammar characteristics of different languages.
For examples, for c language, we use #inlcude to find dependency:
def extract_includes(file_content):
includes = set()
include_pattern = r'#include[ \t]+"([^"]+)"|#include[ \t]+<([^>]+)>'
for match in re.finditer(include_pattern, file_content):
include_path = match.group(1) or match.group(2)
includes.add(include_path)
return includes
def find_dependencies(file_paths, contents):
extensions = [".c", ".h"]
file_paths = [file_name for file_name in file_paths if any(file_name.endswith(ext) for ext in extensions)]
dependencies = []
for file_path_A in file_paths:
content_A = contents[file_path_A]
if not content_A.strip():
continue
includes_A = extract_includes(content_A)
for file_path_B in file_paths:
if file_path_A != file_path_B:
file_name_B = os.path.basename(file_path_B)
for include in includes_A:
if include == file_name_B or include.endswith("/" + file_name_B):
dependencies.append((file_path_B, file_path_A))
break
return dependencies