cmp-ai ollama completion does not work at all

trafficstars

Using manual completion (C-x) it just spams "Completion started" until I have to close nvim.

my config:

{
	'maxwell-bland/cmp-ai',
	config = function()
		local cmp_ai = require('cmp_ai.config')

		cmp_ai:setup({
			max_lines = 100,
			provider = 'Ollama',
			provider_options = {
				stream = true,
				model = 'mistral',
			},
			notify = true,
			notify_callback = function(msg)
				vim.notify(msg)
			end,
			run_on_every_keystroke = false,
		})
	end,
},

Apr 10 '24 15:04 OneOfOne

Did you install Ollama?

Apr 11 '24 04:04 tzachar

Yes, it works fine with the chatgpt plugin for chatting, and I can use it from the terminal, I tried several models so far none of them worked

Apr 11 '24 13:04 OneOfOne

try setting model = codellama other than that, it works on my end...

Apr 14 '24 12:04 tzachar

I was having some issues at first as well, however, I think I wasn't waiting long enough and eventually it does show up (although sometimes it doesn't when I'm low on memory).

@OneOfOne if it helps, here is the configuration that worked for, hitting C-x generates the completion (without streaming so give it 5-60s)

added cmp_ai (3a5f7355) · Commits · RyanGreenup / dotfiles · GitLab

Jul 03 '24 02:07 RyanGreenup

same issue. Completion started, but no VRAM usage via gpustat -cp --watch. Ollama works fine locally, and works using Gen plugin.

Must be a config issue. I'll try to reproduce it with a minimal config.

Edit: it was user error. I feel very stupid, but I'm going to leave stuff here to maybe help someone in the future.

I mistakenly thought codellama:latest was unnecessary because it has the same id as codellama:instruct, and so I removed it.

I was trying to use codellama:code as the provider, and that was the issue. I redownloaded codellama which pulled the cached codellama:latest, and switched to codellama as the provider as tzachar suggested above.

Before going overboard and creating a minimal config like I did, make sure to do a very quick test using curl to debug.

curl http://localhost:11434/api/generate -d '{
"model": "codellama",
"prompt": "Why is the sky blue?"
}'

and the not-so-minimal config with lazy, nvim -u lazy_min.lua main.py:

-- Set leader key
vim.g.mapleader = " "

-- Install lazy.nvim if not already installed
local lazypath = vim.fn.stdpath("data") .. "/lazy/lazy.nvim"
if not vim.loop.fs_stat(lazypath) then
	vim.fn.system({
		"git",
		"clone",
		"--filter=blob:none",
		"https://github.com/folke/lazy.nvim.git",
		"--branch=stable", -- latest stable release
		lazypath,
	})
end
vim.opt.rtp:prepend(lazypath)

vim.lsp.set_log_level("debug")
vim.g.cmp_ai_debug = true

-- Install plugins
require("lazy").setup({
	-- nvim-cmp and dependencies
	{ "neovim/nvim-lspconfig" },
	{ "hrsh7th/nvim-cmp" },
	{ "hrsh7th/cmp-nvim-lsp" },
	{ "tzachar/cmp-ai", dependencies = { "nvim-lua/plenary.nvim" } },
	{ "L3MON4D3/LuaSnip" },
	{ "saadparwaiz1/cmp_luasnip" },
	{ "onsails/lspkind.nvim" },
	{
		"folke/noice.nvim",
		event = "VeryLazy",
		opts = {
			lsp = {
				override = {
					["vim.lsp.util.convert_input_to_markdown_lines"] = true,
					["vim.lsp.util.stylize_markdown"] = true,
					["cmp.entry.get_documentation"] = true, -- requires hrsh7th/nvim-cmp
				},
			},
			presets = {
				bottom_search = true, -- use a classic bottom cmdline for search
				command_palette = true, -- position the cmdline and popupmenu together
				long_message_to_split = true, -- long messages will be sent to a split
				inc_rename = false, -- enables an input dialog for inc-rename.nvim
				lsp_doc_border = false, -- add a border to hover docs and signature help
			},
		},
		dependencies = {
			"MunifTanjim/nui.nvim",
			"rcarriga/nvim-notify",
		},
	},
})

-- Basic Neovim options
vim.opt.completeopt = { "menu", "menuone", "noselect" }

-- Setup nvim-cmp
local cmp = require("cmp")
cmp.setup({
	debug = true,
	snippet = {
		expand = function(args)
			require("luasnip").lsp_expand(args.body)
		end,
	},
	mapping = cmp.mapping.preset.insert({
		["<C-b>"] = cmp.mapping.scroll_docs(-4),
		["<C-f>"] = cmp.mapping.scroll_docs(4),
		["<C-Space>"] = cmp.mapping.complete(),
		["<C-e>"] = cmp.mapping.abort(),
		["<CR>"] = cmp.mapping.confirm({ select = true }),
		["<C-x>"] = cmp.mapping(
			cmp.mapping.complete({
				config = {
					sources = cmp.config.sources({
						{ name = "cmp_ai" },
					}),
				},
			}),
			{ "i" }
		),
	}),
	sources = cmp.config.sources({
		{ name = "cmp_ai" },
		{ name = "nvim_lsp" },
		{ name = "luasnip" },
	}),
	formatting = {
		format = function(entry, vim_item)
			local source_names = {
				nvim_lsp = "[LSP]",
				cmp_ai = "[AI]",
				luasnip = "[Snippet]",
			}
			vim_item.menu = source_names[entry.source.name]
			return vim_item
		end,
	},
})

-- Setup lspconfig
local capabilities = require("cmp_nvim_lsp").default_capabilities()
local lspconfig = require("lspconfig")
lspconfig.pyright.setup({
	capabilities = capabilities,
})

local cmp_ai = require("cmp_ai.config")

cmp_ai:setup({
	max_lines = 100,
	provider = "Ollama",
	provider_options = {
		model = "codellama",
	},
	notify = true,
	notify_callback = function(msg)
		vim.notify(msg)
	end,
	run_on_every_keystroke = true,
	ignored_file_types = {
	},
})

local lspkind = require("lspkind")

local source_mapping = {
	buffer = "[Buffer]",
	nvim_lsp = "[LSP]",
	nvim_lua = "[Lua]",
	cmp_ai = "[AI]",
	path = "[Path]",
}

require("cmp").setup({
	sources = {
		{ name = "nvim_lsp" },
		{ name = "luasnip" },
		{ name = "buffer" },
		{ name = "cmp_ai" },
	},
	formatting = {
		format = function(entry, vim_item)
			-- if you have lspkind installed, you can use it like
			-- in the following line:
			vim_item.kind = lspkind.symbolic(vim_item.kind, { mode = "symbol" })
			vim_item.menu = source_mapping[entry.source.name]
			if entry.source.name == "cmp_ai" then
				local detail = (entry.completion_item.labelDetails or {}).detail
				vim_item.kind = ""
				if detail and detail:find(".*%%.*") then
					vim_item.kind = vim_item.kind .. " " .. detail
				end

				if (entry.completion_item.data or {}).multiline then
					vim_item.kind = vim_item.kind .. " " .. "[ML]"
				end
			end
			local maxwidth = 80
			vim_item.abbr = string.sub(vim_item.abbr, 1, maxwidth)
			return vim_item
		end,
	},
})

local compare = require("cmp.config.compare")
cmp.setup({
	sorting = {
		priority_weight = 2,
		comparators = {
			require("cmp_ai.compare"),
			compare.offset,
			compare.exact,
			compare.score,
			compare.recently_used,
			compare.kind,
			compare.sort_text,
			compare.length,
			compare.order,
		},
	},
})

Jul 08 '24 23:07 bruteforks

I also tried to set it up, and it doesn't work for me. With Wireshark, I looked at request-responses that goes to the codellama model and there is a problem either in the prompt or in the prompt + response. As a prompt, the entire file flies away, and as an answer, not the code completion, but its explanation.

Here are examples

request

{
    "options": {"temperature": 0.2},
    "model": "codellama",
    "stream": false,
    "prompt": "<PRE> from django import forms\nfrom django.core.validators import RegexValidator\n\n\nphone_number_validator = RegexValidator...(removed for readability)...\n\nclass ContactForm(forms.Form):\n    name = forms.CharField(max_length=255)\n    email = forms.EmailField()\n    message = forms.CharField(widget=forms.Textarea(), required=True)\n\nclass ContactForm(f <SUF>) <MID>"
}

response

{
    "model":"codellama",
    "created_at":"2024-07-14T13:07:24.099798Z",
    "response":"  ```\nfrom django import forms\nfrom django.core.validators import RegexValidator...(removed for readability)...\n\nclass ContactForm(forms.Form):\n    name = forms.CharField(max_length=255)\n    email = forms.EmailField()\n    message = forms.CharField(widget=forms.Textarea(), required=True)\n  ```\n  \n  This is a Django form that includes three fields: `title`, `about`, and `url`. The `phone` field has a validator to check if the input is a correct phone number, and the `barcode` field has a validator to check if the input is a correct barcode. The `ReviewForm` class includes only one field, which is a textarea for the review body. The `ContactForm` class includes three fields: `name`, `email`, and `message`.\n\nIt's important to note that in order to use this form in a Django view, you need to import it and create an instance of it. For example:\n```\nfrom .forms import NodeCreateForm\n\ndef my_view(request):\n    form = NodeCreateForm()\n    return render(request, 'my_template.html', {'form': form})\n```\nIn this example, the `NodeCreateForm` class is imported and an instance of it is created and passed to the template as a context variable called `form`. The template can then use this variable to display the form and handle the user input.",
    "done":true,"done_reason":"stop",
    "context":[...],
    "total_duration":15330468416,
    "load_duration":9605625,
    "prompt_eval_count":420,
    "prompt_eval_duration":108984000,
    "eval_count":633,
    "eval_duration":15163806000
}

I don't know how it supposed to work in the right way, but I guess that it should reply with code completion snippet rather than with description.

Jul 14 '24 19:07 undying

Not sure what the issue is. Tabby seems to work fine.

Aug 30 '24 09:08 gmatht

anyone having luck with this?

Sep 29 '24 06:09 danielb2

👍

Nov 28 '24 08:11 spielhoelle

I just got qwen2.5-coder 7b working with cmp-ai and ollama so I thought I'd post some of my findings here:

write your own prompt by passing provider_options.prompt. Different models have different "special tokens" for delimiting the context and the "fill in middle" location. For example, the qwen2.5-coder github repo includes code snippets in the readme and they used these as the special tokens:

  "<|fim_prefix|>"
  "<|fim_middle|>"
  "<|fim_suffix|>"
  "<|fim_pad|>"
  "<|repo_name|>"
  "<|file_sep|>"
  "<|im_start|>"
  "<|im_end|>"

as opposed to the built-in ones in this plugin (<PRE>, <SUF>, <MID>). This is probably going to be model-specific but I think it's worth going into the readme because people use all kinds of different models with ollama. if you want to include project/workspace information etc., this is also where you can do all that.

base models are worth a shot. For qwen2.5-coder 7b, the instruct model actually kept overdoing things, such as including lines of code from the context and including the markdown code block delimiters``` in the response while the responses from the base model tend to be cleaner and are straight away usable (no excessive explanations and delimiters etc.)

Dec 08 '24 12:12 Davidyz

I just got qwen2.5-coder 7b working with cmp-ai and ollama so I thought I'd post some of my findings here:

write your own prompt by passing provider_options.prompt. Different models have different "special tokens" for delimiting the context and the "fill in middle" location. For example, the qwen2.5-coder github repo includes code snippets in the readme and they used these as the special tokens:
  "<|fim_prefix|>"
  "<|fim_middle|>"
  "<|fim_suffix|>"
  "<|fim_pad|>"
  "<|repo_name|>"
  "<|file_sep|>"
  "<|im_start|>"
  "<|im_end|>"
as opposed to the built-in ones in this plugin (<PRE>, <SUF>, <MID>). This is probably going to be model-specific but I think it's worth going into the readme because people use all kinds of different models with ollama. if you want to include project/workspace information etc., this is also where you can do all that.

base models are worth a shot. For qwen2.5-coder 7b, the instruct model actually kept overdoing things, such as including lines of code from the context and including the markdown code block delimiters``` in the response while the responses from the base model tend to be cleaner and are straight away usable (no excessive explanations and delimiters etc.)

Do you mind showing the full config please?

Dec 08 '24 20:12 OneOfOne

If you can provide a writeup, including config examples, I'll add it to the README

Dec 09 '24 08:12 tzachar

If you can provide a writeup, including config examples, I'll add it to the README

I'll prepare a PR if that's okay.

Dec 09 '24 08:12 Davidyz

@Davidyz Cool

Dec 09 '24 09:12 tzachar

Played a bit around with this and I thought I'd share my findings. I thought having a 24GB VRAM on my GPU was going to help but it takes a few couple of seconds depending on the context, so I switched to qwen2.5-coder:7b, and while it "works" technically there are usability issues.

There has to be a way to limit the response size, because most of the time it's just huge, like 80 lines big. This can be probably tuned with the prompt but there should be a sane default of 1/2 lines or 80 characters.
I wish there was a way to also tune the number of suggestions, maybe the 1st suggestion is not really what you where looking for.
It would help a lot if there was a way to cancel the previous request, because sometimes it takes too much time. That would also help with running larger LLMs locally, since I wouldn't be wasting resources on queries that don't matter now.
The super killer feature, which makes this unusable overall is if the results can be streamed into CMP, kind of like what copilot-cmp does. I haven't looked into how he does it, but the problem is that LLM results take a while, so with copilot-cmp, the results from all the other sources are showed almost instantly, and when Copilot is done, then it also shows the copilot suggestions. That makes the waiting for suggestions so much more bearable and not having to stall all suggestions with something like a fetching timeout for cmp.

Hopefully that helps

Here is my config for reference

{
  'tzachar/cmp-ai',
  dependencies = {
    { 'nvim-lua/plenary.nvim' },
  },
  config = function()
    require('cmp_ai.config'):setup({
      max_lines = 100,
      provider = 'Ollama',
      provider_options = {
        model = 'qwen2.5-coder:7b-base-q4_K_M',
        prompt = function(lines_before, lines_after)
          return "<|fim_prefix|>" .. lines_before .. "<|fim_suffix|>" .. lines_after .. "<|fim_middle|>"
        end,
        auto_unload = false
      },
      notify = true,
      notify_callback = function(msg)
        vim.notify(msg)
      end,
      run_on_every_keystroke = true,
      ignored_file_types = {
      },
    })
  end,
},

Feb 21 '25 19:02 darthbanana13

cmp-ai cmp-ai copied to clipboard

ollama completion does not work at all

cmp-ai
cmp-ai copied to clipboard