llm.nvim
llm.nvim copied to clipboard
Extremely slow on Completion
Extremely slow on my Mac use M2 chip. Only use a very light startcoder2 3b model.
I can see a very high GPU or CPU usage and Idle wake Ups. Many times the ollama process became so heavy that a normal ollama quite is not working and need to be force ended.
Not quiet understand what happened. Here is the log of ollama. I cannot understand it but maybe helpful.
{"function":"initialize","level":"INFO","line":444,"msg":"initializing slots","n_slots":1,"tid":"0x17074f000","timestamp":1712133604}
{"function":"initialize","level":"INFO","line":456,"msg":"new slot","n_ctx_slot":2048,"slot_id":0,"tid":"0x17074f000","timestamp":1712133604}
{"function":"update_slots","level":"INFO","line":1572,"msg":"all slots are idle and system prompt is empty, clear the KV cache","tid":"0x176287000","timestamp":1712133604}
time=2024-04-03T17:40:04.803+09:00 level=INFO source=dyn_ext_server.go:159 msg="Starting llama main loop"
{"function":"launch_slot_with_data","level":"INFO","line":829,"msg":"slot is processing task","slot_id":0,"task_id":0,"tid":"0x176287000","timestamp":1712133604}
{"function":"update_slots","ga_i":0,"level":"INFO","line":1810,"msg":"slot progression","n_past":0,"n_past_se":0,"n_prompt_tokens_processed":438,"slot_id":0,"task_id":0,"tid":"0x176287000","timestamp":1712133604}
{"function":"update_slots","level":"INFO","line":1834,"msg":"kv cache rm [p0, end)","p0":0,"slot_id":0,"task_id":0,"tid":"0x176287000","timestamp":1712133604}
[GIN] 2024/04/03 - 17:40:06 | 200 | 1.903387625s | 127.0.0.1 | POST "/api/generate"
[GIN] 2024/04/03 - 17:40:06 | 200 | 1.244827417s | 127.0.0.1 | POST "/api/generate"
[GIN] 2024/04/03 - 17:40:06 | 200 | 1.041619375s | 127.0.0.1 | POST "/api/generate"
[GIN] 2024/04/03 - 17:40:06 | 200 | 838.446166ms | 127.0.0.1 | POST "/api/generate"
[GIN] 2024/04/03 - 17:40:06 | 200 | 412.63925ms | 127.0.0.1 | POST "/api/generate"
{"function":"update_slots","level":"INFO","line":1642,"msg":"slot released","n_cache_tokens":440,"n_ctx":2048,"n_past":439,"n_system_tokens":0,"slot_id":0,"task_id":0,"tid":"0x176287000","timestamp":1712133606,"truncated":false}
{"function":"launch_slot_with_data","level":"INFO","line":829,"msg":"slot is processing task","slot_id":0,"task_id":4,"tid":"0x176287000","timestamp":1712133606}
{"function":"update_slots","ga_i":0,"level":"INFO","line":1810,"msg":"slot progression","n_past":32,"n_past_se":0,"n_prompt_tokens_processed":405,"slot_id":0,"task_id":4,"tid":"0x176287000","timestamp":1712133606}
{"function":"update_slots","level":"INFO","line":1834,"msg":"kv cache rm [p0, end)","p0":32,"slot_id":0,"task_id":4,"tid":"0x176287000","timestamp":1712133606}
{"function":"print_timings","level":"INFO","line":272,"msg":"prompt eval time = 1347.64 ms / 405 tokens ( 3.33 ms per token, 300.53 tokens per second)","n_prompt_tokens_processed":405,"n_tokens_second":300.52513985549564,"slot_id":0,"t_prompt_processing":1347.641,"t_token":3.327508641975309,"task_id":4,"tid":"0x176287000","timestamp":1712133607}
{"function":"print_timings","level":"INFO","line":286,"msg":"generation eval time = 198.78 ms / 7 runs ( 28.40 ms per token, 35.21 tokens per second)","n_decoded":7,"n_tokens_second":35.21392459189576,"slot_id":0,"t_token":28.39785714285714,"t_token_generation":198.785,"task_id":4,"tid":"0x176287000","timestamp":1712133607}
{"function":"print_timings","level":"INFO","line":295,"msg":" total time = 1546.43 ms","slot_id":0,"t_prompt_processing":1347.641,"t_token_generation":198.785,"t_total":1546.4260000000002,"task_id":4,"tid":"0x176287000","timestamp":1712133607}
{"function":"update_slots","level":"INFO","line":1642,"msg":"slot released","n_cache_tokens":444,"n_ctx":2048,"n_past":443,"n_system_tokens":0,"slot_id":0,"task_id":4,"tid":"0x176287000","timestamp":1712133607,"truncated":false}
{"function":"launch_slot_with_data","level":"INFO","line":829,"msg":"slot is processing task","slot_id":0,"task_id":6,"tid":"0x176287000","timestamp":1712133607}
{"function":"update_slots","ga_i":0,"level":"INFO","line":1810,"msg":"slot progression","n_past":31,"n_past_se":0,"n_prompt_tokens_processed":406,"slot_id":0,"task_id":6,"tid":"0x176287000","timestamp":1712133607}
{"function":"update_slots","level":"INFO","line":1834,"msg":"kv cache rm [p0, end)","p0":31,"slot_id":0,"task_id":6,"tid":"0x176287000","timestamp":1712133607}
{"function":"print_timings","level":"INFO","line":272,"msg":"prompt eval time = 1331.61 ms / 406 tokens ( 3.28 ms per token, 304.89 tokens per second)","n_prompt_tokens_processed":406,"n_tokens_second":304.89430455937145,"slot_id":0,"t_prompt_processing":1331.609,"t_token":3.279825123152709,"task_id":6,"tid":"0x176287000","timestamp":1712133609}
{"function":"print_timings","level":"INFO","line":286,"msg":"generation eval time = 162.88 ms / 6 runs ( 27.15 ms per token, 36.84 tokens per second)","n_decoded":6,"n_tokens_second":36.83670900841719,"slot_id":0,"t_token":27.146833333333333,"t_token_generation":162.881,"task_id":6,"tid":"0x176287000","timestamp":1712133609}
{"function":"print_timings","level":"INFO","line":295,"msg":" total time = 1494.49 ms","slot_id":0,"t_prompt_processing":1331.609,"t_token_generation":162.881,"t_total":1494.49,"task_id":6,"tid":"0x176287000","timestamp":1712133609}
{"function":"update_slots","level":"INFO","line":1642,"msg":"slot released","n_cache_tokens":443,"n_ctx":2048,"n_past":442,"n_system_tokens":0,"slot_id":0,"task_id":6,"tid":"0x176287000","timestamp":1712133609,"truncated":false}
{"function":"launch_slot_with_data","level":"INFO","line":829,"msg":"slot is processing task","slot_id":0,"task_id":8,"tid":"0x176287000","timestamp":1712133609}
{"function":"update_slots","ga_i":0,"level":"INFO","line":1810,"msg":"slot progression","n_past":31,"n_past_se":0,"n_prompt_tokens_processed":405,"slot_id":0,"task_id":8,"tid":"0x176287000","timestamp":1712133609}
{"function":"update_slots","level":"INFO","line":1834,"msg":"kv cache rm [p0, end)","p0":31,"slot_id":0,"task_id":8,"tid":"0x176287000","timestamp":1712133609}
{"function":"print_timings","level":"INFO","line":272,"msg":"prompt eval time = 1327.47 ms / 405 tokens ( 3.28 ms per token, 305.09 tokens per second)","n_prompt_tokens_processed":405,"n_tokens_second":305.0904913463531,"slot_id":0,"t_prompt_processing":1327.475,"t_token":3.277716049382716,"task_id":8,"tid":"0x176287000","timestamp":1712133610}
{"function":"print_timings","level":"INFO","line":286,"msg":"generation eval time = 260.96 ms / 9 runs ( 29.00 ms per token, 34.49 tokens per second)","n_decoded":9,"n_tokens_second":34.48791198684861,"slot_id":0,"t_token":28.99566666666667,"t_token_generation":260.961,"task_id":8,"tid":"0x176287000","timestamp":1712133610}
{"function":"print_timings","level":"INFO","line":295,"msg":" total time = 1588.44 ms","slot_id":0,"t_prompt_processing":1327.475,"t_token_generation":260.961,"t_total":1588.436,"task_id":8,"tid":"0x176287000","timestamp":1712133610}
{"function":"update_slots","level":"INFO","line":1642,"msg":"slot released","n_cache_tokens":445,"n_ctx":2048,"n_past":444,"n_system_tokens":0,"slot_id":0,"task_id":8,"tid":"0x176287000","timestamp":1712133610,"truncated":false}
{"function":"launch_slot_with_data","level":"INFO","line":829,"msg":"slot is processing task","slot_id":0,"task_id":10,"tid":"0x176287000","timestamp":1712133610}
{"function":"update_slots","ga_i":0,"level":"INFO","line":1810,"msg":"slot progression","n_past":30,"n_past_se":0,"n_prompt_tokens_processed":407,"slot_id":0,"task_id":10,"tid":"0x176287000","timestamp":1712133610}
{"function":"update_slots","level":"INFO","line":1834,"msg":"kv cache rm [p0, end)","p0":30,"slot_id":0,"task_id":10,"tid":"0x176287000","timestamp":1712133610}
{"function":"print_timings","level":"INFO","line":272,"msg":"prompt eval time = 1329.28 ms / 407 tokens ( 3.27 ms per token, 306.18 tokens per second)","n_prompt_tokens_processed":407,"n_tokens_second":306.1817109464099,"slot_id":0,"t_prompt_processing":1329.276,"t_token":3.266034398034398,"task_id":10,"tid":"0x176287000","timestamp":1712133612}
{"function":"print_timings","level":"INFO","line":286,"msg":"generation eval time = 130.75 ms / 5 runs ( 26.15 ms per token, 38.24 tokens per second)","n_decoded":5,"n_tokens_second":38.24004038148264,"slot_id":0,"t_token":26.150599999999997,"t_token_generation":130.753,"task_id":10,"tid":"0x176287000","timestamp":1712133612}
{"function":"print_timings","level":"INFO","line":295,"msg":" total time = 1460.03 ms","slot_id":0,"t_prompt_processing":1329.276,"t_token_generation":130.753,"t_total":1460.029,"task_id":10,"tid":"0x176287000","timestamp":1712133612}
{"function":"update_slots","level":"INFO","line":1642,"msg":"slot released","n_cache_tokens":442,"n_ctx":2048,"n_past":441,"n_system_tokens":0,"slot_id":0,"task_id":10,"tid":"0x176287000","timestamp":1712133612,"truncated":false}
{"function":"launch_slot_with_data","level":"INFO","line":829,"msg":"slot is processing task","slot_id":0,"task_id":12,"tid":"0x176287000","timestamp":1712133612}
{"function":"update_slots","ga_i":0,"level":"INFO","line":1810,"msg":"slot progression","n_past":32,"n_past_se":0,"n_prompt_tokens_processed":406,"slot_id":0,"task_id":12,"tid":"0x176287000","timestamp":1712133612}
{"function":"update_slots","level":"INFO","line":1834,"msg":"kv cache rm [p0, end)","p0":32,"slot_id":0,"task_id":12,"tid":"0x176287000","timestamp":1712133612}
[GIN] 2024/04/03 - 17:40:13 | 200 | 7.4575515s | 127.0.0.1 | POST "/api/generate"
[GIN] 2024/04/03 - 17:40:13 | 200 | 6.892029459s | 127.0.0.1 | POST "/api/generate"
[GIN] 2024/04/03 - 17:40:13 | 200 | 6.579324209s | 127.0.0.1 | POST "/api/generate"
[GIN] 2024/04/03 - 17:40:13 | 200 | 6.19803325s | 127.0.0.1 | POST "/api/generate"
{"function":"update_slots","level":"INFO","line":1642,"msg":"slot released","n_cache_tokens":440,"n_ctx":2048,"n_past":439,"n_system_tokens":0,"slot_id":0,"task_id":12,"tid":"0x176287000","timestamp":1712133613,"truncated":false}
{"function":"launch_slot_with_data","level":"INFO","line":829,"msg":"slot is processing task","slot_id":0,"task_id":48,"tid":"0x176287000","timestamp":1712133613}
{"function":"update_slots","ga_i":0,"level":"INFO","line":1810,"msg":"slot progression","n_past":31,"n_past_se":0,"n_prompt_tokens_processed":406,"slot_id":0,"task_id":48,"tid":"0x176287000","timestamp":1712133613}
{"function":"update_slots","level":"INFO","line":1834,"msg":"kv cache rm [p0, end)","p0":31,"slot_id":0,"task_id":48,"tid":"0x176287000","timestamp":1712133613}
{"function":"print_timings","level":"INFO","line":272,"msg":"prompt eval time = 1328.01 ms / 406 tokens ( 3.27 ms per token, 305.72 tokens per second)","n_prompt_tokens_processed":406,"n_tokens_second":305.71966861795136,"slot_id":0,"t_prompt_processing":1328.014,"t_token":3.2709704433497535,"task_id":48,"tid":"0x176287000","timestamp":1712133615}
{"function":"print_timings","level":"INFO","line":286,"msg":"generation eval time = 197.04 ms / 7 runs ( 28.15 ms per token, 35.53 tokens per second)","n_decoded":7,"n_tokens_second":35.526502771067214,"slot_id":0,"t_token":28.148,"t_token_generation":197.036,"task_id":48,"tid":"0x176287000","timestamp":1712133615}
{"function":"print_timings","level":"INFO","line":295,"msg":" total time = 1525.05 ms","slot_id":0,"t_prompt_processing":1328.014,"t_token_generation":197.036,"t_total":1525.05,"task_id":48,"tid":"0x176287000","timestamp":1712133615}
{"function":"update_slots","level":"INFO","line":1642,"msg":"slot released","n_cache_tokens":444,"n_ctx":2048,"n_past":443,"n_system_tokens":0,"slot_id":0,"task_id":48,"tid":"0x176287000","timestamp":1712133615,"truncated":false}
{"function":"launch_slot_with_data","level":"INFO","line":829,"msg":"slot is processing task","slot_id":0,"task_id":50,"tid":"0x176287000","timestamp":1712133615}
{"function":"update_slots","ga_i":0,"level":"INFO","line":1810,"msg":"slot progression","n_past":32,"n_past_se":0,"n_prompt_tokens_processed":406,"slot_id":0,"task_id":50,"tid":"0x176287000","timestamp":1712133615}
{"function":"update_slots","level":"INFO","line":1834,"msg":"kv cache rm [p0, end)","p0":32,"slot_id":0,"task_id":50,"tid":"0x176287000","timestamp":1712133615}
{"function":"print_timings","level":"INFO","line":272,"msg":"prompt eval time = 1327.23 ms / 406 tokens ( 3.27 ms per token, 305.90 tokens per second)","n_prompt_tokens_processed":406,"n_tokens_second":305.90025843297695,"slot_id":0,"t_prompt_processing":1327.23,"t_token":3.269039408866995,"task_id":50,"tid":"0x176287000","timestamp":1712133616}
{"function":"print_timings","level":"INFO","line":286,"msg":"generation eval time = 262.41 ms / 9 runs ( 29.16 ms per token, 34.30 tokens per second)","n_decoded":9,"n_tokens_second":34.29760412180985,"slot_id":0,"t_token":29.156555555555556,"t_token_generation":262.409,"task_id":50,"tid":"0x176287000","timestamp":1712133616}
{"function":"print_timings","level":"INFO","line":295,"msg":" total time = 1589.64 ms","slot_id":0,"t_prompt_processing":1327.23,"t_token_generation":262.409,"t_total":1589.6390000000001,"task_id":50,"tid":"0x176287000","timestamp":1712133616}
{"function":"update_slots","level":"INFO","line":1642,"msg":"slot released","n_cache_tokens":447,"n_ctx":2048,"n_past":446,"n_system_tokens":0,"slot_id":0,"task_id":50,"tid":"0x176287000","timestamp":1712133616,"truncated":false}
{"function":"launch_slot_with_data","level":"INFO","line":829,"msg":"slot is processing task","slot_id":0,"task_id":52,"tid":"0x176287000","timestamp":1712133616}
{"function":"update_slots","ga_i":0,"level":"INFO","line":1810,"msg":"slot progression","n_past":32,"n_past_se":0,"n_prompt_tokens_processed":406,"slot_id":0,"task_id":52,"tid":"0x176287000","timestamp":1712133616}
{"function":"update_slots","level":"INFO","line":1834,"msg":"kv cache rm [p0, end)","p0":32,"slot_id":0,"task_id":52,"tid":"0x176287000","timestamp":1712133616}
{"function":"print_timings","level":"INFO","line":272,"msg":"prompt eval time = 1328.47 ms / 406 tokens ( 3.27 ms per token, 305.61 tokens per second)","n_prompt_tokens_processed":406,"n_tokens_second":305.61380952882786,"slot_id":0,"t_prompt_processing":1328.474,"t_token":3.2721034482758617,"task_id":52,"tid":"0x176287000","timestamp":1712133619}
{"function":"print_timings","level":"INFO","line":286,"msg":"generation eval time = 1008.81 ms / 32 runs ( 31.53 ms per token, 31.72 tokens per second)","n_decoded":32,"n_tokens_second":31.72044769446865,"slot_id":0,"t_token":31.52540625,"t_token_generation":1008.813,"task_id":52,"tid":"0x176287000","timestamp":1712133619}
{"function":"print_timings","level":"INFO","line":295,"msg":" total time = 2337.29 ms","slot_id":0,"t_prompt_processing":1328.474,"t_token_generation":1008.813,"t_total":2337.287,"task_id":52,"tid":"0x176287000","timestamp":1712133619}
{"function":"update_slots","level":"INFO","line":1642,"msg":"slot released","n_cache_tokens":470,"n_ctx":2048,"n_past":469,"n_system_tokens":0,"slot_id":0,"task_id":52,"tid":"0x176287000","timestamp":1712133619,"truncated":false}
{"function":"launch_slot_with_data","level":"INFO","line":829,"msg":"slot is processing task","slot_id":0,"task_id":54,"tid":"0x176287000","timestamp":1712133619}
{"function":"update_slots","ga_i":0,"level":"INFO","line":1810,"msg":"slot progression","n_past":33,"n_past_se":0,"n_prompt_tokens_processed":406,"slot_id":0,"task_id":54,"tid":"0x176287000","timestamp":1712133619}
{"function":"update_slots","level":"INFO","line":1834,"msg":"kv cache rm [p0, end)","p0":33,"slot_id":0,"task_id":54,"tid":"0x176287000","timestamp":1712133619}
{"function":"print_timings","level":"INFO","line":272,"msg":"prompt eval time = 1328.28 ms / 406 tokens ( 3.27 ms per token, 305.66 tokens per second)","n_prompt_tokens_processed":406,"n_tokens_second":305.6593659751437,"slot_id":0,"t_prompt_processing":1328.276,"t_token":3.2716157635467984,"task_id":54,"tid":"0x176287000","timestamp":1712133620}
{"function":"print_timings","level":"INFO","line":286,"msg":"generation eval time = 327.84 ms / 11 runs ( 29.80 ms per token, 33.55 tokens per second)","n_decoded":11,"n_tokens_second":33.5533620468771,"slot_id":0,"t_token":29.803272727272727,"t_token_generation":327.836,"task_id":54,"tid":"0x176287000","timestamp":1712133620}
{"function":"print_timings","level":"INFO","line":295,"msg":" total time = 1656.11 ms","slot_id":0,"t_prompt_processing":1328.276,"t_token_generation":327.836,"t_total":1656.112,"task_id":54,"tid":"0x176287000","timestamp":1712133620}
{"function":"update_slots","level":"INFO","line":1642,"msg":"slot released","n_cache_tokens":450,"n_ctx":2048,"n_past":449,"n_system_tokens":0,"slot_id":0,"task_id":54,"tid":"0x176287000","timestamp":1712133620,"truncated":false}
[GIN] 2024/04/03 - 17:40:20 | 200 | 13.134449416s | 127.0.0.1 | POST "/api/generate"