Description

Change how help_tags parses tag files. Specifically, don't split the file by lines and later by tabs, but iterate over the original text and keep track of where the next '\t' and '\n' are located.

Results (time in ms, ~10'000 tags):

	before	after
laptop	60-90	20-30
PC	25-35	5-15

Type of change

(Not from the list) Performance improvement

How Has This Been Tested?

I am not sure if there is a repeatable way to test that this change does not affect current behavior. I wrote a test file that extracts the tags both ways and compares the results:

verify.lua

local vim = vim
local uv = vim.loop

local function read(filename) -- plenary.nvim path.lua Path:_read()
  local fd = assert(uv.fs_open(filename, "r", 438)) -- for some reason test won't pass with absolute
  local stat = assert(uv.fs_fstat(fd))
  local data = assert(uv.fs_read(fd, stat.size, 0))
  assert(uv.fs_close(fd))
  return data
end

local utils = {}
utils.path_tail = (function()
  local os_sep = "/"

  return function(path)
    for i = #path, 1, -1 do
      if path:sub(i, i) == os_sep then
        return path:sub(i + 1, -1)
      end
    end
    return path
  end
end)()


local help_tag_delimiter = "\t"

local function help_tag_advance(text, cur_pos, next_tab, tags_map)
  local text_len = #text

  local next_line_raw = string.find(text, "\n", cur_pos, true)
  local next_line = next_line_raw or text_len + 1

  local name_end = next_tab
  if not name_end or name_end > next_line then
    return next_line_raw, next_tab
  end

  next_tab = string.find(text, help_tag_delimiter, next_tab + 1, true)
  local tag_file_end = next_tab
  if not tag_file_end or tag_file_end > next_line then
    return next_line_raw, next_tab
  end

  next_tab = string.find(text, help_tag_delimiter, next_tab + 1, true)
  if next_tab and next_tab < next_line then -- line must not contain more than 2 tabs
    while next_tab and next_tab < next_line do
      next_tab = string.find(text, help_tag_delimiter, next_tab + 1, true)
    end
    return next_line_raw, next_tab
  end

  local name = text:sub(cur_pos, name_end - 1)
  if tags_map[name] or name == "help-tags" or name:sub(1, 6) == "!_TAG_" then
    return next_line_raw, next_tab
  end

  local tag_file = text:sub(name_end + 1, tag_file_end - 1)
  if tag_file == "tags" then
    return next_line_raw, next_tab
  end

  local cmd = text:sub(tag_file_end + 1, next_line - 1)

  return next_line_raw, next_tab, name, tag_file, cmd
end


local opts = {}

opts.lang = vim.F.if_nil(opts.lang, vim.o.helplang)
opts.fallback = vim.F.if_nil(opts.fallback, true)
opts.file_ignore_patterns = {}
local langs = vim.split(opts.lang, ",", true)
if opts.fallback and not vim.tbl_contains(langs, "en") then
  table.insert(langs, "en")
end
local langs_map = {}
for _, lang in ipairs(langs) do
  langs_map[lang] = true
end
local tag_files = {}
local function add_tag_file(lang, file)
  if langs_map[lang] then
    if tag_files[lang] then
      table.insert(tag_files[lang], file)
    else
      tag_files[lang] = { file }
    end
  end
end

local help_files = {}
local all_files = vim.api.nvim_get_runtime_file("doc/*", true)
for _, fullpath in ipairs(all_files) do
  local file = utils.path_tail(fullpath)
  if file == "tags" then
    add_tag_file("en", fullpath)
  elseif file:match "^tags%-..$" then
    local lang = file:sub(-2)
    add_tag_file(lang, fullpath)
  else
    help_files[file] = fullpath
  end
end


local function result_old()
  local delimiter = "\t"

  local tags = {}
  local tags_map = {}
  for _, lang in ipairs(langs) do
    for _, file in ipairs(tag_files[lang] or {}) do
      local lines = vim.split(read(file), "\n", true)
      for _, line in ipairs(lines) do
        -- TODO: also ignore tagComment starting with ';'
        if not line:match "^!_TAG_" then
          local fields = vim.split(line, delimiter, true)
          if #fields == 3 and not tags_map[fields[1]] then
            if fields[1] ~= "help-tags" or fields[2] ~= "tags" then
              table.insert(tags, {
                name = fields[1],
                filename = help_files[fields[2]],
                cmd = fields[3],
                lang = lang,
              })
              tags_map[fields[1]] = true
            end
          end
        end
      end
    end
  end
  return tags
end

local function result_new()
  local tags = {}
  local tags_map = {}
  for _, lang in ipairs(langs) do
    for _, file in ipairs(tag_files[lang] or {}) do
      local text = read(file)

      local cur_pos = 1
      local next_tab = string.find(text, help_tag_delimiter, cur_pos, true)

      while true do
        local next_line, new_tab, name, tag_file, cmd
        = help_tag_advance(text, cur_pos, next_tab, tags_map)

        if name then
          table.insert(tags, {
            name = name,
            filename = help_files[tag_file],
            cmd = cmd,
            lang = lang,
          })
          tags_map[name] = true
        end

        if not next_line then
          break
        end
        cur_pos = next_line + 1
        next_tab = new_tab
      end
    end
  end
  return tags
end


local s1 = vim.loop.hrtime()
local r1 = result_old()
local e1 = vim.loop.hrtime()

local s2 = vim.loop.hrtime()
local r2 = result_new()
local e2 = vim.loop.hrtime()

print("Old result:", (e1 - s1) * 0.000001, #r1)
print("New result:", (e2 - s2) * 0.000001, #r2)

if #r1 ~= #r2 then
  print("Number of tags doesn't match!")
  return
end

for i, v1 in ipairs(r1) do
  local s1 = vim.inspect(v1)
  local s2 = vim.inspect(r2[i])
  if s1 ~= s2 then
    print("Tag", i, "doesn't match", s1, s2)
    return
  end
end

nvim --clean verify.lua
:source

Configuration:

Neovim version (nvim --version): NVIM v0.9.4 Build type: RelWithDebInfo LuaJIT 2.1.1692716794
Operating system and version: Ubuntu 22.04.3 LTS

Checklist:

[x] My code follows the style guidelines of this project (stylua)
[x] I have performed a self-review of my own code
[x] I have commented my code, particularly in hard-to-understand areas
[x] I have made corresponding changes to the documentation (lua annotations)

May 04 '24 18:05 vanaigr

The second commit removes the duplicate checking. These checks prevent scenarios where the user wants to open help for one tag, but the help is opened for a duplicate tag instead.

In my 12000 tags (with plugins) the only duplicate tag is S (from nvim-surround). And there are currently a few other tags with the same behavior (but for a different reason): https://github.com/nvim-telescope/telescope.nvim/issues/3027#issuecomment-2081890699. Showing duplicate tags also might be useful in and of itself, since you can easily read the information in the preview, and opening duplicate tag otherwise would involve knowing that there is a duplicate tag and running :tjump tag inside of the doc buffer.

Without this check, the time to extract the tags decreases by 5ms on the laptop and by 1 on PC.

May 04 '24 18:05 vanaigr

I updated the logic to remove '\r' from the end of lines (on all platforms). Although I checked if this was an issue before and it seems like it wasn't, so maybe I am missing something.

Sep 10 '24 23:09 vanaigr

perf(builtin): improve help_tags picker performance

Description

Type of change

How Has This Been Tested?

Checklist: