llm-ls
llm-ls copied to clipboard
Can't process response from llamacpp server
I have nvim/llm working with ollama, which uses llm-ls-x86_64-unknown-linux-gnu-0.5.3. I tried to switch the config to use OpenAI API to connect to llamacpp server, because this does support my AMD GPU, which ollama does not.
I can see in Wireshark that the request is sent and the llamacppserver send back a successful response. However I don' get any completion in nvim, which is probably caused by llm-ls processing the response.
I get this error in nvim: [LLM] serde json error: data did not match any variant of untagged enum OpenAIAPIResponse
request:
{
"model": "models/codellama-7b.Q4_K_M.gguf",
"options": {
"temperature": 0.2,
"top_p": 0.95
},
"parameters": {
"max_new_tokens": 60,
"temperature": 0.2,
"top_p": 0.95
},
"prompt": "<PRE> #include <stdio.h>\n\nfloat multiply(float a, float b)\n{\n <SUF>\n}\n\nint main(int argc, char *argv[])\n{\n return 0;\n}\n\n\n <MID>",
"stream": false
}
response:
{
"content": "return a * b; <EOT>",
"id_slot": 0,
"stop": true,
"model": "models/codellama-7b.Q4_K_M.gguf",
"tokens_predicted": 6,
"tokens_evaluated": 54,
"generation_settings": {
"n_ctx": 512,
"n_predict": -1,
"model": "models/codellama-7b.Q4_K_M.gguf",
"seed": 4294967295,
"temperature": 0.800000011920929,
"dynatemp_range": 0,
"dynatemp_exponent": 1,
"top_k": 40,
"top_p": 0.949999988079071,
"min_p": 0.05000000074505806,
"tfs_z": 1,
"typical_p": 1,
"repeat_last_n": 64,
"repeat_penalty": 1,
"presence_penalty": 0,
"frequency_penalty": 0,
"penalty_prompt_tokens": [],
"use_penalty_prompt_tokens": false,
"mirostat": 0,
"mirostat_tau": 5,
"mirostat_eta": 0.10000000149011612,
"penalize_nl": false,
"stop": [],
"n_keep": 0,
"n_discard": 0,
"ignore_eos": false,
"stream": false,
"logit_bias": [],
"n_probs": 0,
"min_keep": 0,
"grammar": "",
"samplers": [
"top_k",
"tfs_z",
"typical_p",
"top_p",
"min_p",
"temperature"
]
},
"prompt": "<PRE> #include <stdio.h>\n\nfloat multiply(float a, float b)\n{\n <SUF>\n}\n\nint main(int argc, char *argv[])\n{\n return 0;\n}\n\n\n <MID>",
"truncated": false,
"stopped_eos": true,
"stopped_word": false,
"stopped_limit": false,
"stopping_word": "",
"tokens_cached": 59,
"timings": {
"prompt_n": 54,
"prompt_ms": 601.562,
"prompt_per_token_ms": 11.140037037037038,
"prompt_per_second": 89.76630837719138,
"predicted_n": 6,
"predicted_ms": 315.451,
"predicted_per_token_ms": 52.57516666666667,
"predicted_per_second": 19.020386684461293
}
}
I hope you can fix that or tell me what I did wrong if its my fault.