mistral.rs
mistral.rs copied to clipboard
key must be a cuda tensor
Describe the bug
Compiled with:
cargo install --path mistralrs-server --features "cuda flash-attn cudnn mkl"
RUST_BACKTRACE=full ./mistralrs_server --interactive-mode --num-device-layers 13 --pa-ctxt-len 8192 gguf -m [path] -f Athene-70B-Q8_0.gguf
> Whooo are you?
2024-07-29T20:08:08.867914Z ERROR mistralrs_core::engine: step - Model failed with error: WithBacktrace { inner: Msg("key must be a cuda tensor"), backtrace: Backtrace [{ fn: "candle_core::error::Error::bt" }, { fn: "mistralrs_paged_attn::backend::paged_attention::reshape_and_cache" }, { fn: "mistralrs_core::paged_attention::layers::paged_attention::PagedAttention::forward" }, { fn: "mistralrs_core::models::quantized_llama::ModelWeights::forward" }, { fn: "<mistralrs_core::pipeline::gguf::GGUFPipeline as mistralrs_core::pipeline::Pipeline>::forward_inputs" }, { fn: "mistralrs_core::pipeline::Pipeline::step::{{closure}}" }, { fn: "mistralrs_core::engine::Engine::run::{{closure}}" }, { fn: "tokio::runtime::park::CachedParkThread::block_on" }, { fn: "tokio::runtime::context::runtime::enter_runtime" }, { fn: "tokio::runtime::runtime::Runtime::block_on" }, { fn: "std::sys_common::backtrace::__rust_begin_short_backtrace" }, { fn: "core::ops::function::FnOnce::call_once{{vtable.shim}}" }, { fn: "std::sys::pal::unix::thread::Thread::new::thread_start" }, { fn: "start_thread", file: "./nptl/pthread_create.c", line: 442 }, { fn: "__GI___clone3", file: "./misc/../sysdeps/unix/sysv/linux/x86_64/clone3.S", line: 81 }] }
2024-07-29T20:08:08.868132Z ERROR mistralrs_server::interactive_mode: Got a model error: "key must be a cuda tensor\n 0: candle_core::error::Error::bt\n 1: mistralrs_paged_attn::backend::paged_attention::reshape_and_cache\n 2: mistralrs_core::paged_attention::layers::paged_attention::PagedAttention::forward\n 3: mistralrs_core::models::quantized_llama::ModelWeights::forward\n 4: <mistralrs_core::pipeline::gguf::GGUFPipeline as mistralrs_core::pipeline::Pipeline>::forward_inputs\n 5: mistralrs_core::pipeline::Pipeline::step::{{closure}}\n 6: mistralrs_core::engine::Engine::run::{{closure}}\n 7: tokio::runtime::park::CachedParkThread::block_on\n 8: tokio::runtime::context::runtime::enter_runtime\n 9: tokio::runtime::runtime::Runtime::block_on\n 10: std::sys_common::backtrace::__rust_begin_short_backtrace\n 11: core::ops::function::FnOnce::call_once{{vtable.shim}}\n 12: std::sys::pal::unix::thread::Thread::new::thread_start\n 13: start_thread\n at ./nptl/pthread_create.c:442:8\n 14: __GI___clone3\n at ./misc/../sysdeps/unix/sysv/linux/x86_64/clone3.S:81\n", response: ChatCompletionResponse { id: "0", choices: [Choice { finish_reason: "error", index: 0, message: ResponseMessage { content: "", role: "assistant" }, logprobs: None }], created: 1722283688, model: "/media/j/72B264BFB2648A05", system_fingerprint: "local", object: "chat.completion", usage: Usage { completion_tokens: 0, prompt_tokens: 48, total_tokens: 48, avg_tok_per_sec: 282.35297, avg_prompt_tok_per_sec: inf, avg_compl_tok_per_sec: NaN, total_time_sec: 0.17, total_prompt_time_sec: 0.0, total_completion_time_sec: 0.0 } }
2024-07-29T20:08:08.894217Z ERROR mistralrs_core::engine: step - Model failed with error: WithBacktrace { inner: Msg("key must be a cuda tensor"), backtrace: Backtrace [{ fn: "candle_core::error::Error::bt" }, { fn: "mistralrs_paged_attn::backend::paged_attention::reshape_and_cache" }, { fn: "mistralrs_core::paged_attention::layers::paged_attention::PagedAttention::forward" }, { fn: "mistralrs_core::models::quantized_llama::ModelWeights::forward" }, { fn: "<mistralrs_core::pipeline::gguf::GGUFPipeline as mistralrs_core::pipeline::Pipeline>::forward_inputs" }, { fn: "mistralrs_core::pipeline::Pipeline::step::{{closure}}" }, { fn: "mistralrs_core::engine::Engine::run::{{closure}}" }, { fn: "tokio::runtime::park::CachedParkThread::block_on" }, { fn: "tokio::runtime::context::runtime::enter_runtime" }, { fn: "tokio::runtime::runtime::Runtime::block_on" }, { fn: "std::sys_common::backtrace::__rust_begin_short_backtrace" }, { fn: "core::ops::function::FnOnce::call_once{{vtable.shim}}" }, { fn: "std::sys::pal::unix::thread::Thread::new::thread_start" }, { fn: "start_thread", file: "./nptl/pthread_create.c", line: 442 }, { fn: "__GI___clone3", file: "./misc/../sysdeps/unix/sysv/linux/x86_64/clone3.S", line: 81 }] }
thread '<unnamed>' panicked at mistralrs-core/src/engine/mod.rs:350:25:
called `Result::unwrap()` on an `Err` value: SendError { .. }
stack backtrace:
0: 0x58a97d4c7195 - <std::sys_common::backtrace::_print::DisplayBacktrace as core::fmt::Display>::fmt::h3692694645b1bb6a
1: 0x58a97d4f57ab - core::fmt::write::h5131d80b4c69b88d
2: 0x58a97d4c2b2f - std::io::Write::write_fmt::h1fb327a7d8b0eb36
3: 0x58a97d4c6f6e - std::sys_common::backtrace::print::h998d75b840f75a73
4: 0x58a97d4c84b9 - std::panicking::default_hook::{{closure}}::h18ec7fe6a38b9da0
5: 0x58a97d4c825a - std::panicking::default_hook::hfb3f22c2e4075a6a
6: 0x58a97d4c8953 - std::panicking::rust_panic_with_hook::h51af00bcb4660c4e
7: 0x58a97d4c8834 - std::panicking::begin_panic_handler::{{closure}}::h39f76aa863fbe8ce
8: 0x58a97d4c7659 - std::sys_common::backtrace::__rust_end_short_backtrace::h4d10fc2251b89840
9: 0x58a97d4c8567 - rust_begin_unwind
10: 0x58a97affab33 - core::panicking::panic_fmt::h319840fcbcd912ef
11: 0x58a97affb026 - core::result::unwrap_failed::haccb9aaa604e1e21
12: 0x58a97b5d5fbf - mistralrs_core::engine::Engine::run::{{closure}}::h29b06a6cd0d36e16
13: 0x58a97b5c9e30 - tokio::runtime::park::CachedParkThread::block_on::h12bcf0bdd8cc7851
14: 0x58a97b429573 - tokio::runtime::context::runtime::enter_runtime::h4d782a51f55d5d61
15: 0x58a97b5ca58c - tokio::runtime::runtime::Runtime::block_on::hca58a89cc6335ce2
16: 0x58a97b6714cd - std::sys_common::backtrace::__rust_begin_short_backtrace::h87eb596641e8ce48
17: 0x58a97b67456d - core::ops::function::FnOnce::call_once{{vtable.shim}}::h32e3e0c97ebb8183
18: 0x58a97d4ce0eb - std::sys::pal::unix::thread::Thread::new::thread_start::h3b8e81128811868f
19: 0x7b60f1094ac3 - start_thread
at ./nptl/pthread_create.c:442:8
20: 0x7b60f1126850 - __GI___clone3
at ./misc/../sysdeps/unix/sysv/linux/x86_64/clone3.S:81
21: 0x0 - <unknown>
Latest commit or version
0.2.3