mistral.rs icon indicating copy to clipboard operation
mistral.rs copied to clipboard

key must be a cuda tensor

Open oldgithubman opened this issue 6 months ago • 4 comments

Describe the bug

Compiled with:

cargo install --path mistralrs-server --features "cuda flash-attn cudnn mkl"

RUST_BACKTRACE=full ./mistralrs_server --interactive-mode --num-device-layers 13 --pa-ctxt-len 8192 gguf -m [path] -f Athene-70B-Q8_0.gguf

> Whooo are you?
2024-07-29T20:08:08.867914Z ERROR mistralrs_core::engine: step - Model failed with error: WithBacktrace { inner: Msg("key must be a cuda tensor"), backtrace: Backtrace [{ fn: "candle_core::error::Error::bt" }, { fn: "mistralrs_paged_attn::backend::paged_attention::reshape_and_cache" }, { fn: "mistralrs_core::paged_attention::layers::paged_attention::PagedAttention::forward" }, { fn: "mistralrs_core::models::quantized_llama::ModelWeights::forward" }, { fn: "<mistralrs_core::pipeline::gguf::GGUFPipeline as mistralrs_core::pipeline::Pipeline>::forward_inputs" }, { fn: "mistralrs_core::pipeline::Pipeline::step::{{closure}}" }, { fn: "mistralrs_core::engine::Engine::run::{{closure}}" }, { fn: "tokio::runtime::park::CachedParkThread::block_on" }, { fn: "tokio::runtime::context::runtime::enter_runtime" }, { fn: "tokio::runtime::runtime::Runtime::block_on" }, { fn: "std::sys_common::backtrace::__rust_begin_short_backtrace" }, { fn: "core::ops::function::FnOnce::call_once{{vtable.shim}}" }, { fn: "std::sys::pal::unix::thread::Thread::new::thread_start" }, { fn: "start_thread", file: "./nptl/pthread_create.c", line: 442 }, { fn: "__GI___clone3", file: "./misc/../sysdeps/unix/sysv/linux/x86_64/clone3.S", line: 81 }] }
2024-07-29T20:08:08.868132Z ERROR mistralrs_server::interactive_mode: Got a model error: "key must be a cuda tensor\n   0: candle_core::error::Error::bt\n   1: mistralrs_paged_attn::backend::paged_attention::reshape_and_cache\n   2: mistralrs_core::paged_attention::layers::paged_attention::PagedAttention::forward\n   3: mistralrs_core::models::quantized_llama::ModelWeights::forward\n   4: <mistralrs_core::pipeline::gguf::GGUFPipeline as mistralrs_core::pipeline::Pipeline>::forward_inputs\n   5: mistralrs_core::pipeline::Pipeline::step::{{closure}}\n   6: mistralrs_core::engine::Engine::run::{{closure}}\n   7: tokio::runtime::park::CachedParkThread::block_on\n   8: tokio::runtime::context::runtime::enter_runtime\n   9: tokio::runtime::runtime::Runtime::block_on\n  10: std::sys_common::backtrace::__rust_begin_short_backtrace\n  11: core::ops::function::FnOnce::call_once{{vtable.shim}}\n  12: std::sys::pal::unix::thread::Thread::new::thread_start\n  13: start_thread\n             at ./nptl/pthread_create.c:442:8\n  14: __GI___clone3\n             at ./misc/../sysdeps/unix/sysv/linux/x86_64/clone3.S:81\n", response: ChatCompletionResponse { id: "0", choices: [Choice { finish_reason: "error", index: 0, message: ResponseMessage { content: "", role: "assistant" }, logprobs: None }], created: 1722283688, model: "/media/j/72B264BFB2648A05", system_fingerprint: "local", object: "chat.completion", usage: Usage { completion_tokens: 0, prompt_tokens: 48, total_tokens: 48, avg_tok_per_sec: 282.35297, avg_prompt_tok_per_sec: inf, avg_compl_tok_per_sec: NaN, total_time_sec: 0.17, total_prompt_time_sec: 0.0, total_completion_time_sec: 0.0 } }
2024-07-29T20:08:08.894217Z ERROR mistralrs_core::engine: step - Model failed with error: WithBacktrace { inner: Msg("key must be a cuda tensor"), backtrace: Backtrace [{ fn: "candle_core::error::Error::bt" }, { fn: "mistralrs_paged_attn::backend::paged_attention::reshape_and_cache" }, { fn: "mistralrs_core::paged_attention::layers::paged_attention::PagedAttention::forward" }, { fn: "mistralrs_core::models::quantized_llama::ModelWeights::forward" }, { fn: "<mistralrs_core::pipeline::gguf::GGUFPipeline as mistralrs_core::pipeline::Pipeline>::forward_inputs" }, { fn: "mistralrs_core::pipeline::Pipeline::step::{{closure}}" }, { fn: "mistralrs_core::engine::Engine::run::{{closure}}" }, { fn: "tokio::runtime::park::CachedParkThread::block_on" }, { fn: "tokio::runtime::context::runtime::enter_runtime" }, { fn: "tokio::runtime::runtime::Runtime::block_on" }, { fn: "std::sys_common::backtrace::__rust_begin_short_backtrace" }, { fn: "core::ops::function::FnOnce::call_once{{vtable.shim}}" }, { fn: "std::sys::pal::unix::thread::Thread::new::thread_start" }, { fn: "start_thread", file: "./nptl/pthread_create.c", line: 442 }, { fn: "__GI___clone3", file: "./misc/../sysdeps/unix/sysv/linux/x86_64/clone3.S", line: 81 }] }
thread '<unnamed>' panicked at mistralrs-core/src/engine/mod.rs:350:25:
called `Result::unwrap()` on an `Err` value: SendError { .. }
stack backtrace:
   0:     0x58a97d4c7195 - <std::sys_common::backtrace::_print::DisplayBacktrace as core::fmt::Display>::fmt::h3692694645b1bb6a
   1:     0x58a97d4f57ab - core::fmt::write::h5131d80b4c69b88d
   2:     0x58a97d4c2b2f - std::io::Write::write_fmt::h1fb327a7d8b0eb36
   3:     0x58a97d4c6f6e - std::sys_common::backtrace::print::h998d75b840f75a73
   4:     0x58a97d4c84b9 - std::panicking::default_hook::{{closure}}::h18ec7fe6a38b9da0
   5:     0x58a97d4c825a - std::panicking::default_hook::hfb3f22c2e4075a6a
   6:     0x58a97d4c8953 - std::panicking::rust_panic_with_hook::h51af00bcb4660c4e
   7:     0x58a97d4c8834 - std::panicking::begin_panic_handler::{{closure}}::h39f76aa863fbe8ce
   8:     0x58a97d4c7659 - std::sys_common::backtrace::__rust_end_short_backtrace::h4d10fc2251b89840
   9:     0x58a97d4c8567 - rust_begin_unwind
  10:     0x58a97affab33 - core::panicking::panic_fmt::h319840fcbcd912ef
  11:     0x58a97affb026 - core::result::unwrap_failed::haccb9aaa604e1e21
  12:     0x58a97b5d5fbf - mistralrs_core::engine::Engine::run::{{closure}}::h29b06a6cd0d36e16
  13:     0x58a97b5c9e30 - tokio::runtime::park::CachedParkThread::block_on::h12bcf0bdd8cc7851
  14:     0x58a97b429573 - tokio::runtime::context::runtime::enter_runtime::h4d782a51f55d5d61
  15:     0x58a97b5ca58c - tokio::runtime::runtime::Runtime::block_on::hca58a89cc6335ce2
  16:     0x58a97b6714cd - std::sys_common::backtrace::__rust_begin_short_backtrace::h87eb596641e8ce48
  17:     0x58a97b67456d - core::ops::function::FnOnce::call_once{{vtable.shim}}::h32e3e0c97ebb8183
  18:     0x58a97d4ce0eb - std::sys::pal::unix::thread::Thread::new::thread_start::h3b8e81128811868f
  19:     0x7b60f1094ac3 - start_thread
                               at ./nptl/pthread_create.c:442:8
  20:     0x7b60f1126850 - __GI___clone3
                               at ./misc/../sysdeps/unix/sysv/linux/x86_64/clone3.S:81
  21:                0x0 - <unknown>

Latest commit or version

0.2.3

oldgithubman avatar Jul 29 '24 20:07 oldgithubman