perf(builtin): improve help_tags picker performance
Description
Change how help_tags parses tag files. Specifically, don't split the file by lines and later by tabs, but iterate over the original text and keep track of where the next '\t' and '\n' are located.
Results (time in ms, ~10'000 tags):
| before | after | |
|---|---|---|
| laptop | 60-90 | 20-30 |
| PC | 25-35 | 5-15 |
Type of change
(Not from the list) Performance improvement
How Has This Been Tested?
I am not sure if there is a repeatable way to test that this change does not affect current behavior. I wrote a test file that extracts the tags both ways and compares the results:
verify.lua
local vim = vim
local uv = vim.loop
local function read(filename) -- plenary.nvim path.lua Path:_read()
local fd = assert(uv.fs_open(filename, "r", 438)) -- for some reason test won't pass with absolute
local stat = assert(uv.fs_fstat(fd))
local data = assert(uv.fs_read(fd, stat.size, 0))
assert(uv.fs_close(fd))
return data
end
local utils = {}
utils.path_tail = (function()
local os_sep = "/"
return function(path)
for i = #path, 1, -1 do
if path:sub(i, i) == os_sep then
return path:sub(i + 1, -1)
end
end
return path
end
end)()
local help_tag_delimiter = "\t"
local function help_tag_advance(text, cur_pos, next_tab, tags_map)
local text_len = #text
local next_line_raw = string.find(text, "\n", cur_pos, true)
local next_line = next_line_raw or text_len + 1
local name_end = next_tab
if not name_end or name_end > next_line then
return next_line_raw, next_tab
end
next_tab = string.find(text, help_tag_delimiter, next_tab + 1, true)
local tag_file_end = next_tab
if not tag_file_end or tag_file_end > next_line then
return next_line_raw, next_tab
end
next_tab = string.find(text, help_tag_delimiter, next_tab + 1, true)
if next_tab and next_tab < next_line then -- line must not contain more than 2 tabs
while next_tab and next_tab < next_line do
next_tab = string.find(text, help_tag_delimiter, next_tab + 1, true)
end
return next_line_raw, next_tab
end
local name = text:sub(cur_pos, name_end - 1)
if tags_map[name] or name == "help-tags" or name:sub(1, 6) == "!_TAG_" then
return next_line_raw, next_tab
end
local tag_file = text:sub(name_end + 1, tag_file_end - 1)
if tag_file == "tags" then
return next_line_raw, next_tab
end
local cmd = text:sub(tag_file_end + 1, next_line - 1)
return next_line_raw, next_tab, name, tag_file, cmd
end
local opts = {}
opts.lang = vim.F.if_nil(opts.lang, vim.o.helplang)
opts.fallback = vim.F.if_nil(opts.fallback, true)
opts.file_ignore_patterns = {}
local langs = vim.split(opts.lang, ",", true)
if opts.fallback and not vim.tbl_contains(langs, "en") then
table.insert(langs, "en")
end
local langs_map = {}
for _, lang in ipairs(langs) do
langs_map[lang] = true
end
local tag_files = {}
local function add_tag_file(lang, file)
if langs_map[lang] then
if tag_files[lang] then
table.insert(tag_files[lang], file)
else
tag_files[lang] = { file }
end
end
end
local help_files = {}
local all_files = vim.api.nvim_get_runtime_file("doc/*", true)
for _, fullpath in ipairs(all_files) do
local file = utils.path_tail(fullpath)
if file == "tags" then
add_tag_file("en", fullpath)
elseif file:match "^tags%-..$" then
local lang = file:sub(-2)
add_tag_file(lang, fullpath)
else
help_files[file] = fullpath
end
end
local function result_old()
local delimiter = "\t"
local tags = {}
local tags_map = {}
for _, lang in ipairs(langs) do
for _, file in ipairs(tag_files[lang] or {}) do
local lines = vim.split(read(file), "\n", true)
for _, line in ipairs(lines) do
-- TODO: also ignore tagComment starting with ';'
if not line:match "^!_TAG_" then
local fields = vim.split(line, delimiter, true)
if #fields == 3 and not tags_map[fields[1]] then
if fields[1] ~= "help-tags" or fields[2] ~= "tags" then
table.insert(tags, {
name = fields[1],
filename = help_files[fields[2]],
cmd = fields[3],
lang = lang,
})
tags_map[fields[1]] = true
end
end
end
end
end
end
return tags
end
local function result_new()
local tags = {}
local tags_map = {}
for _, lang in ipairs(langs) do
for _, file in ipairs(tag_files[lang] or {}) do
local text = read(file)
local cur_pos = 1
local next_tab = string.find(text, help_tag_delimiter, cur_pos, true)
while true do
local next_line, new_tab, name, tag_file, cmd
= help_tag_advance(text, cur_pos, next_tab, tags_map)
if name then
table.insert(tags, {
name = name,
filename = help_files[tag_file],
cmd = cmd,
lang = lang,
})
tags_map[name] = true
end
if not next_line then
break
end
cur_pos = next_line + 1
next_tab = new_tab
end
end
end
return tags
end
local s1 = vim.loop.hrtime()
local r1 = result_old()
local e1 = vim.loop.hrtime()
local s2 = vim.loop.hrtime()
local r2 = result_new()
local e2 = vim.loop.hrtime()
print("Old result:", (e1 - s1) * 0.000001, #r1)
print("New result:", (e2 - s2) * 0.000001, #r2)
if #r1 ~= #r2 then
print("Number of tags doesn't match!")
return
end
for i, v1 in ipairs(r1) do
local s1 = vim.inspect(v1)
local s2 = vim.inspect(r2[i])
if s1 ~= s2 then
print("Tag", i, "doesn't match", s1, s2)
return
end
end
nvim --clean verify.lua
:source
Configuration:
-
Neovim version (nvim --version): NVIM v0.9.4 Build type: RelWithDebInfo LuaJIT 2.1.1692716794
-
Operating system and version: Ubuntu 22.04.3 LTS
Checklist:
- [x] My code follows the style guidelines of this project (stylua)
- [x] I have performed a self-review of my own code
- [x] I have commented my code, particularly in hard-to-understand areas
- [x] I have made corresponding changes to the documentation (lua annotations)
The second commit removes the duplicate checking. These checks prevent scenarios where the user wants to open help for one tag, but the help is opened for a duplicate tag instead.
In my 12000 tags (with plugins) the only duplicate tag is S (from nvim-surround). And there are currently a few other tags with the same behavior (but for a different reason): https://github.com/nvim-telescope/telescope.nvim/issues/3027#issuecomment-2081890699.
Showing duplicate tags also might be useful in and of itself, since you can easily read the information in the preview, and opening duplicate tag otherwise would involve knowing that there is a duplicate tag and running :tjump tag inside of the doc buffer.
Without this check, the time to extract the tags decreases by 5ms on the laptop and by 1 on PC.
I updated the logic to remove '\r' from the end of lines (on all platforms). Although I checked if this was an issue before and it seems like it wasn't, so maybe I am missing something.