lorax
lorax copied to clipboard
Important: In latest main, the server can not serve more than 1 user
System Info
Meet this error when more than 1 user request to server (I try to run previous image version and it still work fine)
ID not found in entries. This is a bug.
stack backtrace:
0: 0x55acc1cd4f5c - std::backtrace_rs::backtrace::libunwind::trace::h67a838aed1f4d6ec
at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/std/src/../../backtrace/src/backtrace/libunwind.rs:93:5
1: 0x55acc1cd4f5c - std::backtrace_rs::backtrace::trace_unsynchronized::h1d1786bb1962baf8
at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/std/src/../../backtrace/src/backtrace/mod.rs:66:5
2: 0x55acc1cd4f5c - std::sys_common::backtrace::_print_fmt::h5a0b1f807a002d23
at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/std/src/sys_common/backtrace.rs:67:5
3: 0x55acc1cd4f5c - <std::sys_common::backtrace::_print::DisplayBacktrace as core::fmt::Display>::fmt::hf84ab6ad0b91784c
at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/std/src/sys_common/backtrace.rs:44:22
4: 0x55acc1d01d8c - core::fmt::rt::Argument::fmt::h28f463bd1fdabed5
at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/core/src/fmt/rt.rs:138:9
5: 0x55acc1d01d8c - core::fmt::write::ha37c23b175e921b3
at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/core/src/fmt/mod.rs:1114:21
6: 0x55acc1cd17ce - std::io::Write::write_fmt::haa1b000741bcbbe1
at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/std/src/io/mod.rs:1763:15
7: 0x55acc1cd4d44 - std::sys_common::backtrace::_print::h1ff1030b04dfb157
at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/std/src/sys_common/backtrace.rs:47:5
8: 0x55acc1cd4d44 - std::sys_common::backtrace::print::hb982056c6f29541c
at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/std/src/sys_common/backtrace.rs:34:9
9: 0x55acc1cd6473 - std::panicking::default_hook::{{closure}}::h11f92f82c62fbd68
at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/std/src/panicking.rs:272:22
10: 0x55acc1cd6194 - std::panicking::default_hook::hb8810fe276772c66
at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/std/src/panicking.rs:292:9
11: 0x55acc1cd69f5 - std::panicking::rust_panic_with_hook::hd2f0efd2fec86cb0
at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/std/src/panicking.rs:731:13
12: 0x55acc1cd68f1 - std::panicking::begin_panic_handler::{{closure}}::h3651b7fc4f61d784
at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/std/src/panicking.rs:609:13
13: 0x55acc1cd5486 - std::sys_common::backtrace::__rust_end_short_backtrace::hbc468e4b98c7ae04
at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/std/src/sys_common/backtrace.rs:170:18
14: 0x55acc1cd6642 - rust_begin_unwind
at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/std/src/panicking.rs:597:5
15: 0x55acc12e1075 - core::panicking::panic_fmt::h979245e2fdb2fabd
at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/core/src/panicking.rs:72:14
16: 0x55acc12e1033 - core::panicking::panic_display::h9b355c58fd35af38
at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/core/src/panicking.rs:168:5
17: 0x55acc12e1033 - core::panicking::panic_str::h187a5146d72e7d2f
at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/core/src/panicking.rs:152:5
18: 0x55acc12e1033 - core::option::expect_failed::h7cdfa49208a82a89
at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/core/src/option.rs:1988:5
19: 0x55acc172ba7d - lorax_router::infer::filter_send_generations::{{closure}}::h6d30e692d85e9a5c
20: 0x55acc172ba7d - core::iter::traits::iterator::Iterator::for_each::call::{{closure}}::h68eb01f7ecc7097a
at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/core/src/iter/traits/iterator.rs:854:29
21: 0x55acc172ba7d - core::iter::traits::iterator::Iterator::fold::h7ab3a222ce402821
at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/core/src/iter/traits/iterator.rs:2639:21
22: 0x55acc172ba7d - core::iter::traits::iterator::Iterator::for_each::h1a17e8d270d54052
at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/core/src/iter/traits/iterator.rs:857:9
23: 0x55acc172d276 - lorax_router::infer::filter_send_generations::h687c3ed4fb0824e8
at /usr/src/router/src/infer.rs:761:5
24: 0x55acc17618c6 - lorax_router::infer::prefill::{{closure}}::{{closure}}::h91b7e7435144c2ae
at /usr/src/router/src/infer.rs:605:13
25: 0x55acc175365c - <tracing::instrument::Instrumented<T> as core::future::future::Future>::poll::he0b835ce68a0641f
at /usr/local/cargo/registry/src/index.crates.io-6f17d22bba15001f/tracing-0.1.40/src/instrument.rs:321:9
26: 0x55acc175365c - lorax_router::infer::prefill::{{closure}}::he668e13682ad69f2
at /usr/src/router/src/infer.rs:589:1
27: 0x55acc175365c - <tracing::instrument::Instrumented<T> as core::future::future::Future>::poll::h0b251984432d2f7c
at /usr/local/cargo/registry/src/index.crates.io-6f17d22bba15001f/tracing-0.1.40/src/instrument.rs:321:9
28: 0x55acc17366bd - <lorax_router::batch::GenerateBatchEntries as lorax_router::batch::BatchEntries>::process_first::{{closure}}::h0a93f83c73861b5c
at /usr/src/router/src/batch.rs:315:10
29: 0x55acc178db41 - <core::pin::Pin<P> as core::future::future::Future>::poll::h47267cc93a805788
at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/core/src/future/future.rs:125:9
30: 0x55acc1766ef6 - lorax_router::infer::batching_task::{{closure}}::h6c42ca99e514e067
at /usr/src/router/src/infer.rs:549:26
31: 0x55acc176602f - tokio::runtime::task::core::Core<T,S>::poll::{{closure}}::h6fa682b46516d869
at /usr/local/cargo/registry/src/index.crates.io-6f17d22bba15001f/tokio-1.38.0/src/runtime/task/core.rs:328:17
32: 0x55acc176602f - tokio::loom::std::unsafe_cell::UnsafeCell<T>::with_mut::h3467b54ed6d4a3fa
at /usr/local/cargo/registry/src/index.crates.io-6f17d22bba15001f/tokio-1.38.0/src/loom/std/unsafe_cell.rs:16:9
33: 0x55acc176602f - tokio::runtime::task::core::Core<T,S>::poll::h2227f544428749a2
at /usr/local/cargo/registry/src/index.crates.io-6f17d22bba15001f/tokio-1.38.0/src/runtime/task/core.rs:317:30
34: 0x55acc16ffbaf - std::panicking::try::do_call::hded8ccfdb06a0f73
at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/std/src/panicking.rs:504:40
35: 0x55acc16ffbaf - std::panicking::try::h84e8d909153c6a34
at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/std/src/panicking.rs:468:19
36: 0x55acc17a1c74 - std::panic::catch_unwind::ha07a70865c5cf819
at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/std/src/panic.rs:142:14
37: 0x55acc17a1c74 - tokio::runtime::task::harness::poll_future::h6242a51ce5628d88
at /usr/local/cargo/registry/src/index.crates.io-6f17d22bba15001f/tokio-1.38.0/src/runtime/task/harness.rs:473:18
38: 0x55acc17a1c74 - tokio::runtime::task::harness::Harness<T,S>::poll_inner::he2e75c7551c16150
at /usr/local/cargo/registry/src/index.crates.io-6f17d22bba15001f/tokio-1.38.0/src/runtime/task/harness.rs:208:27
39: 0x55acc17a1c74 - tokio::runtime::task::harness::Harness<T,S>::poll::h160dc344962ad018
at /usr/local/cargo/registry/src/index.crates.io-6f17d22bba15001f/tokio-1.38.0/src/runtime/task/harness.rs:153:15
{"timestamp":"2024-06-12T07:53:01.887457Z","level":"ERROR","fields":{"message":"Webserver Crashed"},"target":"lorax_launcher"}
{"timestamp":"2024-06-12T07:53:01.887497Z","level":"INFO","fields":{"message":"Shutting down shards"},"target":"lorax_launcher"}
{"timestamp":"2024-06-12T07:53:02.192889Z","level":"INFO","fields":{"message":"Shard terminated"},"target":"lorax_launcher","span":{"rank":0,"name":"shard-manager"},"spans":[{"rank":0,"name":"shard-manager"}]}
Error: WebserverFailed
Information
- [X] Docker
- [ ] The CLI directly
Tasks
- [ ] An officially supported command
- [ ] My own modifications
Reproduction
Start server by docker with model microsoft/Phi-3-mini-128k-instruct
Expected behavior
Server can serve many concurrent user at the same time
Note: This version still works well
I can confirm this issue is happening, it could go un noticed if the Docker container is set to restart unless stopped
Is this still an issue? We have had to revert to older containers due to instability issues. It is not clear it is on the Lorax side, but it could be.