cmp-ai
cmp-ai copied to clipboard
ollama completion does not work at all
Using manual completion (C-x) it just spams "Completion started" until I have to close nvim.
my config:
{
'maxwell-bland/cmp-ai',
config = function()
local cmp_ai = require('cmp_ai.config')
cmp_ai:setup({
max_lines = 100,
provider = 'Ollama',
provider_options = {
stream = true,
model = 'mistral',
},
notify = true,
notify_callback = function(msg)
vim.notify(msg)
end,
run_on_every_keystroke = false,
})
end,
},
Did you install Ollama?
Yes, it works fine with the chatgpt plugin for chatting, and I can use it from the terminal, I tried several models so far none of them worked
try setting model = codellama
other than that, it works on my end...
I was having some issues at first as well, however, I think I wasn't waiting long enough and eventually it does show up (although sometimes it doesn't when I'm low on memory).
@OneOfOne if it helps, here is the configuration that worked for, hitting C-x generates the completion (without streaming so give it 5-60s)
same issue. Completion started, but no VRAM usage via gpustat -cp --watch.
Ollama works fine locally, and works using Gen plugin.
Must be a config issue. I'll try to reproduce it with a minimal config.
Edit: it was user error. I feel very stupid, but I'm going to leave stuff here to maybe help someone in the future.
I mistakenly thought codellama:latest was unnecessary because it has the same id as codellama:instruct, and so I removed it.
I was trying to use codellama:code as the provider, and that was the issue. I redownloaded codellama which pulled the cached codellama:latest, and switched to codellama as the provider as tzachar suggested above.
Before going overboard and creating a minimal config like I did, make sure to do a very quick test using curl to debug.
curl http://localhost:11434/api/generate -d '{
"model": "codellama",
"prompt": "Why is the sky blue?"
}'
and the not-so-minimal config with lazy, nvim -u lazy_min.lua main.py:
-- Set leader key
vim.g.mapleader = " "
-- Install lazy.nvim if not already installed
local lazypath = vim.fn.stdpath("data") .. "/lazy/lazy.nvim"
if not vim.loop.fs_stat(lazypath) then
vim.fn.system({
"git",
"clone",
"--filter=blob:none",
"https://github.com/folke/lazy.nvim.git",
"--branch=stable", -- latest stable release
lazypath,
})
end
vim.opt.rtp:prepend(lazypath)
vim.lsp.set_log_level("debug")
vim.g.cmp_ai_debug = true
-- Install plugins
require("lazy").setup({
-- nvim-cmp and dependencies
{ "neovim/nvim-lspconfig" },
{ "hrsh7th/nvim-cmp" },
{ "hrsh7th/cmp-nvim-lsp" },
{ "tzachar/cmp-ai", dependencies = { "nvim-lua/plenary.nvim" } },
{ "L3MON4D3/LuaSnip" },
{ "saadparwaiz1/cmp_luasnip" },
{ "onsails/lspkind.nvim" },
{
"folke/noice.nvim",
event = "VeryLazy",
opts = {
lsp = {
override = {
["vim.lsp.util.convert_input_to_markdown_lines"] = true,
["vim.lsp.util.stylize_markdown"] = true,
["cmp.entry.get_documentation"] = true, -- requires hrsh7th/nvim-cmp
},
},
presets = {
bottom_search = true, -- use a classic bottom cmdline for search
command_palette = true, -- position the cmdline and popupmenu together
long_message_to_split = true, -- long messages will be sent to a split
inc_rename = false, -- enables an input dialog for inc-rename.nvim
lsp_doc_border = false, -- add a border to hover docs and signature help
},
},
dependencies = {
"MunifTanjim/nui.nvim",
"rcarriga/nvim-notify",
},
},
})
-- Basic Neovim options
vim.opt.completeopt = { "menu", "menuone", "noselect" }
-- Setup nvim-cmp
local cmp = require("cmp")
cmp.setup({
debug = true,
snippet = {
expand = function(args)
require("luasnip").lsp_expand(args.body)
end,
},
mapping = cmp.mapping.preset.insert({
["<C-b>"] = cmp.mapping.scroll_docs(-4),
["<C-f>"] = cmp.mapping.scroll_docs(4),
["<C-Space>"] = cmp.mapping.complete(),
["<C-e>"] = cmp.mapping.abort(),
["<CR>"] = cmp.mapping.confirm({ select = true }),
["<C-x>"] = cmp.mapping(
cmp.mapping.complete({
config = {
sources = cmp.config.sources({
{ name = "cmp_ai" },
}),
},
}),
{ "i" }
),
}),
sources = cmp.config.sources({
{ name = "cmp_ai" },
{ name = "nvim_lsp" },
{ name = "luasnip" },
}),
formatting = {
format = function(entry, vim_item)
local source_names = {
nvim_lsp = "[LSP]",
cmp_ai = "[AI]",
luasnip = "[Snippet]",
}
vim_item.menu = source_names[entry.source.name]
return vim_item
end,
},
})
-- Setup lspconfig
local capabilities = require("cmp_nvim_lsp").default_capabilities()
local lspconfig = require("lspconfig")
lspconfig.pyright.setup({
capabilities = capabilities,
})
local cmp_ai = require("cmp_ai.config")
cmp_ai:setup({
max_lines = 100,
provider = "Ollama",
provider_options = {
model = "codellama",
},
notify = true,
notify_callback = function(msg)
vim.notify(msg)
end,
run_on_every_keystroke = true,
ignored_file_types = {
},
})
local lspkind = require("lspkind")
local source_mapping = {
buffer = "[Buffer]",
nvim_lsp = "[LSP]",
nvim_lua = "[Lua]",
cmp_ai = "[AI]",
path = "[Path]",
}
require("cmp").setup({
sources = {
{ name = "nvim_lsp" },
{ name = "luasnip" },
{ name = "buffer" },
{ name = "cmp_ai" },
},
formatting = {
format = function(entry, vim_item)
-- if you have lspkind installed, you can use it like
-- in the following line:
vim_item.kind = lspkind.symbolic(vim_item.kind, { mode = "symbol" })
vim_item.menu = source_mapping[entry.source.name]
if entry.source.name == "cmp_ai" then
local detail = (entry.completion_item.labelDetails or {}).detail
vim_item.kind = ""
if detail and detail:find(".*%%.*") then
vim_item.kind = vim_item.kind .. " " .. detail
end
if (entry.completion_item.data or {}).multiline then
vim_item.kind = vim_item.kind .. " " .. "[ML]"
end
end
local maxwidth = 80
vim_item.abbr = string.sub(vim_item.abbr, 1, maxwidth)
return vim_item
end,
},
})
local compare = require("cmp.config.compare")
cmp.setup({
sorting = {
priority_weight = 2,
comparators = {
require("cmp_ai.compare"),
compare.offset,
compare.exact,
compare.score,
compare.recently_used,
compare.kind,
compare.sort_text,
compare.length,
compare.order,
},
},
})
I also tried to set it up, and it doesn't work for me. With Wireshark, I looked at request-responses that goes to the codellama model and there is a problem either in the prompt or in the prompt + response. As a prompt, the entire file flies away, and as an answer, not the code completion, but its explanation.
Here are examples
request
{
"options": {"temperature": 0.2},
"model": "codellama",
"stream": false,
"prompt": "<PRE> from django import forms\nfrom django.core.validators import RegexValidator\n\n\nphone_number_validator = RegexValidator...(removed for readability)...\n\nclass ContactForm(forms.Form):\n name = forms.CharField(max_length=255)\n email = forms.EmailField()\n message = forms.CharField(widget=forms.Textarea(), required=True)\n\nclass ContactForm(f <SUF>) <MID>"
}
response
{
"model":"codellama",
"created_at":"2024-07-14T13:07:24.099798Z",
"response":" ```\nfrom django import forms\nfrom django.core.validators import RegexValidator...(removed for readability)...\n\nclass ContactForm(forms.Form):\n name = forms.CharField(max_length=255)\n email = forms.EmailField()\n message = forms.CharField(widget=forms.Textarea(), required=True)\n ```\n \n This is a Django form that includes three fields: `title`, `about`, and `url`. The `phone` field has a validator to check if the input is a correct phone number, and the `barcode` field has a validator to check if the input is a correct barcode. The `ReviewForm` class includes only one field, which is a textarea for the review body. The `ContactForm` class includes three fields: `name`, `email`, and `message`.\n\nIt's important to note that in order to use this form in a Django view, you need to import it and create an instance of it. For example:\n```\nfrom .forms import NodeCreateForm\n\ndef my_view(request):\n form = NodeCreateForm()\n return render(request, 'my_template.html', {'form': form})\n```\nIn this example, the `NodeCreateForm` class is imported and an instance of it is created and passed to the template as a context variable called `form`. The template can then use this variable to display the form and handle the user input.",
"done":true,"done_reason":"stop",
"context":[...],
"total_duration":15330468416,
"load_duration":9605625,
"prompt_eval_count":420,
"prompt_eval_duration":108984000,
"eval_count":633,
"eval_duration":15163806000
}
I don't know how it supposed to work in the right way, but I guess that it should reply with code completion snippet rather than with description.
Not sure what the issue is. Tabby seems to work fine.
anyone having luck with this?
👍
I just got qwen2.5-coder 7b working with cmp-ai and ollama so I thought I'd post some of my findings here:
- write your own prompt by passing
provider_options.prompt. Different models have different "special tokens" for delimiting the context and the "fill in middle" location. For example, the qwen2.5-coder github repo includes code snippets in the readme and they used these as the special tokens:
"<|fim_prefix|>"
"<|fim_middle|>"
"<|fim_suffix|>"
"<|fim_pad|>"
"<|repo_name|>"
"<|file_sep|>"
"<|im_start|>"
"<|im_end|>"
as opposed to the built-in ones in this plugin (<PRE>, <SUF>, <MID>). This is probably going to be model-specific but I think it's worth going into the readme because people use all kinds of different models with ollama.
if you want to include project/workspace information etc., this is also where you can do all that.
- base models are worth a shot. For qwen2.5-coder 7b, the instruct model actually kept overdoing things, such as including lines of code from the context and including the markdown code block delimiters
```in the response while the responses from the base model tend to be cleaner and are straight away usable (no excessive explanations and delimiters etc.)
I just got qwen2.5-coder 7b working with cmp-ai and ollama so I thought I'd post some of my findings here:
- write your own prompt by passing
provider_options.prompt. Different models have different "special tokens" for delimiting the context and the "fill in middle" location. For example, the qwen2.5-coder github repo includes code snippets in the readme and they used these as the special tokens:"<|fim_prefix|>" "<|fim_middle|>" "<|fim_suffix|>" "<|fim_pad|>" "<|repo_name|>" "<|file_sep|>" "<|im_start|>" "<|im_end|>"as opposed to the built-in ones in this plugin (
<PRE>,<SUF>,<MID>). This is probably going to be model-specific but I think it's worth going into the readme because people use all kinds of different models with ollama. if you want to include project/workspace information etc., this is also where you can do all that.
- base models are worth a shot. For qwen2.5-coder 7b, the instruct model actually kept overdoing things, such as including lines of code from the context and including the markdown code block delimiters
```in the response while the responses from the base model tend to be cleaner and are straight away usable (no excessive explanations and delimiters etc.)
Do you mind showing the full config please?
If you can provide a writeup, including config examples, I'll add it to the README
If you can provide a writeup, including config examples, I'll add it to the README
I'll prepare a PR if that's okay.
@Davidyz Cool
Played a bit around with this and I thought I'd share my findings. I thought having a 24GB VRAM on my GPU was going to help but it takes a few couple of seconds depending on the context, so I switched to qwen2.5-coder:7b, and while it "works" technically there are usability issues.
- There has to be a way to limit the response size, because most of the time it's just huge, like 80 lines big. This can be probably tuned with the prompt but there should be a sane default of 1/2 lines or 80 characters.
- I wish there was a way to also tune the number of suggestions, maybe the 1st suggestion is not really what you where looking for.
- It would help a lot if there was a way to cancel the previous request, because sometimes it takes too much time. That would also help with running larger LLMs locally, since I wouldn't be wasting resources on queries that don't matter now.
- The super killer feature, which makes this unusable overall is if the results can be streamed into CMP, kind of like what copilot-cmp does. I haven't looked into how he does it, but the problem is that LLM results take a while, so with copilot-cmp, the results from all the other sources are showed almost instantly, and when Copilot is done, then it also shows the copilot suggestions. That makes the waiting for suggestions so much more bearable and not having to stall all suggestions with something like a fetching timeout for cmp.
Hopefully that helps
Here is my config for reference
{
'tzachar/cmp-ai',
dependencies = {
{ 'nvim-lua/plenary.nvim' },
},
config = function()
require('cmp_ai.config'):setup({
max_lines = 100,
provider = 'Ollama',
provider_options = {
model = 'qwen2.5-coder:7b-base-q4_K_M',
prompt = function(lines_before, lines_after)
return "<|fim_prefix|>" .. lines_before .. "<|fim_suffix|>" .. lines_after .. "<|fim_middle|>"
end,
auto_unload = false
},
notify = true,
notify_callback = function(msg)
vim.notify(msg)
end,
run_on_every_keystroke = true,
ignored_file_types = {
},
})
end,
},