llm.nvim icon indicating copy to clipboard operation
llm.nvim copied to clipboard

Extremely slow on Completion

Open gzfrozen opened this issue 10 months ago • 0 comments

Extremely slow on my Mac use M2 chip. Only use a very light startcoder2 3b model.

I can see a very high GPU or CPU usage and Idle wake Ups. Many times the ollama process became so heavy that a normal ollama quite is not working and need to be force ended.

image

Not quiet understand what happened. Here is the log of ollama. I cannot understand it but maybe helpful.

{"function":"initialize","level":"INFO","line":444,"msg":"initializing slots","n_slots":1,"tid":"0x17074f000","timestamp":1712133604}
{"function":"initialize","level":"INFO","line":456,"msg":"new slot","n_ctx_slot":2048,"slot_id":0,"tid":"0x17074f000","timestamp":1712133604}
{"function":"update_slots","level":"INFO","line":1572,"msg":"all slots are idle and system prompt is empty, clear the KV cache","tid":"0x176287000","timestamp":1712133604}
time=2024-04-03T17:40:04.803+09:00 level=INFO source=dyn_ext_server.go:159 msg="Starting llama main loop"
{"function":"launch_slot_with_data","level":"INFO","line":829,"msg":"slot is processing task","slot_id":0,"task_id":0,"tid":"0x176287000","timestamp":1712133604}
{"function":"update_slots","ga_i":0,"level":"INFO","line":1810,"msg":"slot progression","n_past":0,"n_past_se":0,"n_prompt_tokens_processed":438,"slot_id":0,"task_id":0,"tid":"0x176287000","timestamp":1712133604}
{"function":"update_slots","level":"INFO","line":1834,"msg":"kv cache rm [p0, end)","p0":0,"slot_id":0,"task_id":0,"tid":"0x176287000","timestamp":1712133604}
[GIN] 2024/04/03 - 17:40:06 | 200 |  1.903387625s |       127.0.0.1 | POST     "/api/generate"
[GIN] 2024/04/03 - 17:40:06 | 200 |  1.244827417s |       127.0.0.1 | POST     "/api/generate"
[GIN] 2024/04/03 - 17:40:06 | 200 |  1.041619375s |       127.0.0.1 | POST     "/api/generate"
[GIN] 2024/04/03 - 17:40:06 | 200 |  838.446166ms |       127.0.0.1 | POST     "/api/generate"
[GIN] 2024/04/03 - 17:40:06 | 200 |   412.63925ms |       127.0.0.1 | POST     "/api/generate"
{"function":"update_slots","level":"INFO","line":1642,"msg":"slot released","n_cache_tokens":440,"n_ctx":2048,"n_past":439,"n_system_tokens":0,"slot_id":0,"task_id":0,"tid":"0x176287000","timestamp":1712133606,"truncated":false}
{"function":"launch_slot_with_data","level":"INFO","line":829,"msg":"slot is processing task","slot_id":0,"task_id":4,"tid":"0x176287000","timestamp":1712133606}
{"function":"update_slots","ga_i":0,"level":"INFO","line":1810,"msg":"slot progression","n_past":32,"n_past_se":0,"n_prompt_tokens_processed":405,"slot_id":0,"task_id":4,"tid":"0x176287000","timestamp":1712133606}
{"function":"update_slots","level":"INFO","line":1834,"msg":"kv cache rm [p0, end)","p0":32,"slot_id":0,"task_id":4,"tid":"0x176287000","timestamp":1712133606}
{"function":"print_timings","level":"INFO","line":272,"msg":"prompt eval time     =    1347.64 ms /   405 tokens (    3.33 ms per token,   300.53 tokens per second)","n_prompt_tokens_processed":405,"n_tokens_second":300.52513985549564,"slot_id":0,"t_prompt_processing":1347.641,"t_token":3.327508641975309,"task_id":4,"tid":"0x176287000","timestamp":1712133607}
{"function":"print_timings","level":"INFO","line":286,"msg":"generation eval time =     198.78 ms /     7 runs   (   28.40 ms per token,    35.21 tokens per second)","n_decoded":7,"n_tokens_second":35.21392459189576,"slot_id":0,"t_token":28.39785714285714,"t_token_generation":198.785,"task_id":4,"tid":"0x176287000","timestamp":1712133607}
{"function":"print_timings","level":"INFO","line":295,"msg":"          total time =    1546.43 ms","slot_id":0,"t_prompt_processing":1347.641,"t_token_generation":198.785,"t_total":1546.4260000000002,"task_id":4,"tid":"0x176287000","timestamp":1712133607}
{"function":"update_slots","level":"INFO","line":1642,"msg":"slot released","n_cache_tokens":444,"n_ctx":2048,"n_past":443,"n_system_tokens":0,"slot_id":0,"task_id":4,"tid":"0x176287000","timestamp":1712133607,"truncated":false}
{"function":"launch_slot_with_data","level":"INFO","line":829,"msg":"slot is processing task","slot_id":0,"task_id":6,"tid":"0x176287000","timestamp":1712133607}
{"function":"update_slots","ga_i":0,"level":"INFO","line":1810,"msg":"slot progression","n_past":31,"n_past_se":0,"n_prompt_tokens_processed":406,"slot_id":0,"task_id":6,"tid":"0x176287000","timestamp":1712133607}
{"function":"update_slots","level":"INFO","line":1834,"msg":"kv cache rm [p0, end)","p0":31,"slot_id":0,"task_id":6,"tid":"0x176287000","timestamp":1712133607}
{"function":"print_timings","level":"INFO","line":272,"msg":"prompt eval time     =    1331.61 ms /   406 tokens (    3.28 ms per token,   304.89 tokens per second)","n_prompt_tokens_processed":406,"n_tokens_second":304.89430455937145,"slot_id":0,"t_prompt_processing":1331.609,"t_token":3.279825123152709,"task_id":6,"tid":"0x176287000","timestamp":1712133609}
{"function":"print_timings","level":"INFO","line":286,"msg":"generation eval time =     162.88 ms /     6 runs   (   27.15 ms per token,    36.84 tokens per second)","n_decoded":6,"n_tokens_second":36.83670900841719,"slot_id":0,"t_token":27.146833333333333,"t_token_generation":162.881,"task_id":6,"tid":"0x176287000","timestamp":1712133609}
{"function":"print_timings","level":"INFO","line":295,"msg":"          total time =    1494.49 ms","slot_id":0,"t_prompt_processing":1331.609,"t_token_generation":162.881,"t_total":1494.49,"task_id":6,"tid":"0x176287000","timestamp":1712133609}
{"function":"update_slots","level":"INFO","line":1642,"msg":"slot released","n_cache_tokens":443,"n_ctx":2048,"n_past":442,"n_system_tokens":0,"slot_id":0,"task_id":6,"tid":"0x176287000","timestamp":1712133609,"truncated":false}
{"function":"launch_slot_with_data","level":"INFO","line":829,"msg":"slot is processing task","slot_id":0,"task_id":8,"tid":"0x176287000","timestamp":1712133609}
{"function":"update_slots","ga_i":0,"level":"INFO","line":1810,"msg":"slot progression","n_past":31,"n_past_se":0,"n_prompt_tokens_processed":405,"slot_id":0,"task_id":8,"tid":"0x176287000","timestamp":1712133609}
{"function":"update_slots","level":"INFO","line":1834,"msg":"kv cache rm [p0, end)","p0":31,"slot_id":0,"task_id":8,"tid":"0x176287000","timestamp":1712133609}
{"function":"print_timings","level":"INFO","line":272,"msg":"prompt eval time     =    1327.47 ms /   405 tokens (    3.28 ms per token,   305.09 tokens per second)","n_prompt_tokens_processed":405,"n_tokens_second":305.0904913463531,"slot_id":0,"t_prompt_processing":1327.475,"t_token":3.277716049382716,"task_id":8,"tid":"0x176287000","timestamp":1712133610}
{"function":"print_timings","level":"INFO","line":286,"msg":"generation eval time =     260.96 ms /     9 runs   (   29.00 ms per token,    34.49 tokens per second)","n_decoded":9,"n_tokens_second":34.48791198684861,"slot_id":0,"t_token":28.99566666666667,"t_token_generation":260.961,"task_id":8,"tid":"0x176287000","timestamp":1712133610}
{"function":"print_timings","level":"INFO","line":295,"msg":"          total time =    1588.44 ms","slot_id":0,"t_prompt_processing":1327.475,"t_token_generation":260.961,"t_total":1588.436,"task_id":8,"tid":"0x176287000","timestamp":1712133610}
{"function":"update_slots","level":"INFO","line":1642,"msg":"slot released","n_cache_tokens":445,"n_ctx":2048,"n_past":444,"n_system_tokens":0,"slot_id":0,"task_id":8,"tid":"0x176287000","timestamp":1712133610,"truncated":false}
{"function":"launch_slot_with_data","level":"INFO","line":829,"msg":"slot is processing task","slot_id":0,"task_id":10,"tid":"0x176287000","timestamp":1712133610}
{"function":"update_slots","ga_i":0,"level":"INFO","line":1810,"msg":"slot progression","n_past":30,"n_past_se":0,"n_prompt_tokens_processed":407,"slot_id":0,"task_id":10,"tid":"0x176287000","timestamp":1712133610}
{"function":"update_slots","level":"INFO","line":1834,"msg":"kv cache rm [p0, end)","p0":30,"slot_id":0,"task_id":10,"tid":"0x176287000","timestamp":1712133610}
{"function":"print_timings","level":"INFO","line":272,"msg":"prompt eval time     =    1329.28 ms /   407 tokens (    3.27 ms per token,   306.18 tokens per second)","n_prompt_tokens_processed":407,"n_tokens_second":306.1817109464099,"slot_id":0,"t_prompt_processing":1329.276,"t_token":3.266034398034398,"task_id":10,"tid":"0x176287000","timestamp":1712133612}
{"function":"print_timings","level":"INFO","line":286,"msg":"generation eval time =     130.75 ms /     5 runs   (   26.15 ms per token,    38.24 tokens per second)","n_decoded":5,"n_tokens_second":38.24004038148264,"slot_id":0,"t_token":26.150599999999997,"t_token_generation":130.753,"task_id":10,"tid":"0x176287000","timestamp":1712133612}
{"function":"print_timings","level":"INFO","line":295,"msg":"          total time =    1460.03 ms","slot_id":0,"t_prompt_processing":1329.276,"t_token_generation":130.753,"t_total":1460.029,"task_id":10,"tid":"0x176287000","timestamp":1712133612}
{"function":"update_slots","level":"INFO","line":1642,"msg":"slot released","n_cache_tokens":442,"n_ctx":2048,"n_past":441,"n_system_tokens":0,"slot_id":0,"task_id":10,"tid":"0x176287000","timestamp":1712133612,"truncated":false}
{"function":"launch_slot_with_data","level":"INFO","line":829,"msg":"slot is processing task","slot_id":0,"task_id":12,"tid":"0x176287000","timestamp":1712133612}
{"function":"update_slots","ga_i":0,"level":"INFO","line":1810,"msg":"slot progression","n_past":32,"n_past_se":0,"n_prompt_tokens_processed":406,"slot_id":0,"task_id":12,"tid":"0x176287000","timestamp":1712133612}
{"function":"update_slots","level":"INFO","line":1834,"msg":"kv cache rm [p0, end)","p0":32,"slot_id":0,"task_id":12,"tid":"0x176287000","timestamp":1712133612}
[GIN] 2024/04/03 - 17:40:13 | 200 |    7.4575515s |       127.0.0.1 | POST     "/api/generate"
[GIN] 2024/04/03 - 17:40:13 | 200 |  6.892029459s |       127.0.0.1 | POST     "/api/generate"
[GIN] 2024/04/03 - 17:40:13 | 200 |  6.579324209s |       127.0.0.1 | POST     "/api/generate"
[GIN] 2024/04/03 - 17:40:13 | 200 |   6.19803325s |       127.0.0.1 | POST     "/api/generate"
{"function":"update_slots","level":"INFO","line":1642,"msg":"slot released","n_cache_tokens":440,"n_ctx":2048,"n_past":439,"n_system_tokens":0,"slot_id":0,"task_id":12,"tid":"0x176287000","timestamp":1712133613,"truncated":false}
{"function":"launch_slot_with_data","level":"INFO","line":829,"msg":"slot is processing task","slot_id":0,"task_id":48,"tid":"0x176287000","timestamp":1712133613}
{"function":"update_slots","ga_i":0,"level":"INFO","line":1810,"msg":"slot progression","n_past":31,"n_past_se":0,"n_prompt_tokens_processed":406,"slot_id":0,"task_id":48,"tid":"0x176287000","timestamp":1712133613}
{"function":"update_slots","level":"INFO","line":1834,"msg":"kv cache rm [p0, end)","p0":31,"slot_id":0,"task_id":48,"tid":"0x176287000","timestamp":1712133613}
{"function":"print_timings","level":"INFO","line":272,"msg":"prompt eval time     =    1328.01 ms /   406 tokens (    3.27 ms per token,   305.72 tokens per second)","n_prompt_tokens_processed":406,"n_tokens_second":305.71966861795136,"slot_id":0,"t_prompt_processing":1328.014,"t_token":3.2709704433497535,"task_id":48,"tid":"0x176287000","timestamp":1712133615}
{"function":"print_timings","level":"INFO","line":286,"msg":"generation eval time =     197.04 ms /     7 runs   (   28.15 ms per token,    35.53 tokens per second)","n_decoded":7,"n_tokens_second":35.526502771067214,"slot_id":0,"t_token":28.148,"t_token_generation":197.036,"task_id":48,"tid":"0x176287000","timestamp":1712133615}
{"function":"print_timings","level":"INFO","line":295,"msg":"          total time =    1525.05 ms","slot_id":0,"t_prompt_processing":1328.014,"t_token_generation":197.036,"t_total":1525.05,"task_id":48,"tid":"0x176287000","timestamp":1712133615}
{"function":"update_slots","level":"INFO","line":1642,"msg":"slot released","n_cache_tokens":444,"n_ctx":2048,"n_past":443,"n_system_tokens":0,"slot_id":0,"task_id":48,"tid":"0x176287000","timestamp":1712133615,"truncated":false}
{"function":"launch_slot_with_data","level":"INFO","line":829,"msg":"slot is processing task","slot_id":0,"task_id":50,"tid":"0x176287000","timestamp":1712133615}
{"function":"update_slots","ga_i":0,"level":"INFO","line":1810,"msg":"slot progression","n_past":32,"n_past_se":0,"n_prompt_tokens_processed":406,"slot_id":0,"task_id":50,"tid":"0x176287000","timestamp":1712133615}
{"function":"update_slots","level":"INFO","line":1834,"msg":"kv cache rm [p0, end)","p0":32,"slot_id":0,"task_id":50,"tid":"0x176287000","timestamp":1712133615}
{"function":"print_timings","level":"INFO","line":272,"msg":"prompt eval time     =    1327.23 ms /   406 tokens (    3.27 ms per token,   305.90 tokens per second)","n_prompt_tokens_processed":406,"n_tokens_second":305.90025843297695,"slot_id":0,"t_prompt_processing":1327.23,"t_token":3.269039408866995,"task_id":50,"tid":"0x176287000","timestamp":1712133616}
{"function":"print_timings","level":"INFO","line":286,"msg":"generation eval time =     262.41 ms /     9 runs   (   29.16 ms per token,    34.30 tokens per second)","n_decoded":9,"n_tokens_second":34.29760412180985,"slot_id":0,"t_token":29.156555555555556,"t_token_generation":262.409,"task_id":50,"tid":"0x176287000","timestamp":1712133616}
{"function":"print_timings","level":"INFO","line":295,"msg":"          total time =    1589.64 ms","slot_id":0,"t_prompt_processing":1327.23,"t_token_generation":262.409,"t_total":1589.6390000000001,"task_id":50,"tid":"0x176287000","timestamp":1712133616}
{"function":"update_slots","level":"INFO","line":1642,"msg":"slot released","n_cache_tokens":447,"n_ctx":2048,"n_past":446,"n_system_tokens":0,"slot_id":0,"task_id":50,"tid":"0x176287000","timestamp":1712133616,"truncated":false}
{"function":"launch_slot_with_data","level":"INFO","line":829,"msg":"slot is processing task","slot_id":0,"task_id":52,"tid":"0x176287000","timestamp":1712133616}
{"function":"update_slots","ga_i":0,"level":"INFO","line":1810,"msg":"slot progression","n_past":32,"n_past_se":0,"n_prompt_tokens_processed":406,"slot_id":0,"task_id":52,"tid":"0x176287000","timestamp":1712133616}
{"function":"update_slots","level":"INFO","line":1834,"msg":"kv cache rm [p0, end)","p0":32,"slot_id":0,"task_id":52,"tid":"0x176287000","timestamp":1712133616}
{"function":"print_timings","level":"INFO","line":272,"msg":"prompt eval time     =    1328.47 ms /   406 tokens (    3.27 ms per token,   305.61 tokens per second)","n_prompt_tokens_processed":406,"n_tokens_second":305.61380952882786,"slot_id":0,"t_prompt_processing":1328.474,"t_token":3.2721034482758617,"task_id":52,"tid":"0x176287000","timestamp":1712133619}
{"function":"print_timings","level":"INFO","line":286,"msg":"generation eval time =    1008.81 ms /    32 runs   (   31.53 ms per token,    31.72 tokens per second)","n_decoded":32,"n_tokens_second":31.72044769446865,"slot_id":0,"t_token":31.52540625,"t_token_generation":1008.813,"task_id":52,"tid":"0x176287000","timestamp":1712133619}
{"function":"print_timings","level":"INFO","line":295,"msg":"          total time =    2337.29 ms","slot_id":0,"t_prompt_processing":1328.474,"t_token_generation":1008.813,"t_total":2337.287,"task_id":52,"tid":"0x176287000","timestamp":1712133619}
{"function":"update_slots","level":"INFO","line":1642,"msg":"slot released","n_cache_tokens":470,"n_ctx":2048,"n_past":469,"n_system_tokens":0,"slot_id":0,"task_id":52,"tid":"0x176287000","timestamp":1712133619,"truncated":false}
{"function":"launch_slot_with_data","level":"INFO","line":829,"msg":"slot is processing task","slot_id":0,"task_id":54,"tid":"0x176287000","timestamp":1712133619}
{"function":"update_slots","ga_i":0,"level":"INFO","line":1810,"msg":"slot progression","n_past":33,"n_past_se":0,"n_prompt_tokens_processed":406,"slot_id":0,"task_id":54,"tid":"0x176287000","timestamp":1712133619}
{"function":"update_slots","level":"INFO","line":1834,"msg":"kv cache rm [p0, end)","p0":33,"slot_id":0,"task_id":54,"tid":"0x176287000","timestamp":1712133619}
{"function":"print_timings","level":"INFO","line":272,"msg":"prompt eval time     =    1328.28 ms /   406 tokens (    3.27 ms per token,   305.66 tokens per second)","n_prompt_tokens_processed":406,"n_tokens_second":305.6593659751437,"slot_id":0,"t_prompt_processing":1328.276,"t_token":3.2716157635467984,"task_id":54,"tid":"0x176287000","timestamp":1712133620}
{"function":"print_timings","level":"INFO","line":286,"msg":"generation eval time =     327.84 ms /    11 runs   (   29.80 ms per token,    33.55 tokens per second)","n_decoded":11,"n_tokens_second":33.5533620468771,"slot_id":0,"t_token":29.803272727272727,"t_token_generation":327.836,"task_id":54,"tid":"0x176287000","timestamp":1712133620}
{"function":"print_timings","level":"INFO","line":295,"msg":"          total time =    1656.11 ms","slot_id":0,"t_prompt_processing":1328.276,"t_token_generation":327.836,"t_total":1656.112,"task_id":54,"tid":"0x176287000","timestamp":1712133620}
{"function":"update_slots","level":"INFO","line":1642,"msg":"slot released","n_cache_tokens":450,"n_ctx":2048,"n_past":449,"n_system_tokens":0,"slot_id":0,"task_id":54,"tid":"0x176287000","timestamp":1712133620,"truncated":false}
[GIN] 2024/04/03 - 17:40:20 | 200 | 13.134449416s |       127.0.0.1 | POST     "/api/generate"

gzfrozen avatar Apr 03 '24 08:04 gzfrozen