stevenarella icon indicating copy to clipboard operation
stevenarella copied to clipboard

thread '<unknown>' has overflowed its stack, fatal runtime error: stack overflow, __rust_probestack from reload_server_list

Open iceiix opened this issue 3 years ago • 4 comments

When running a debug build, with no optimizations, removing opt-level = 1 from Cargo.toml, a __rust_probestack crash occurs from reload_server_list -> read_packet -> packet_by_id, indicating a stack overflow:

% cargo run
[blocking/client.rs:753][TRACE] closed runtime thread (ThreadId(15))

thread '<unknown>' has overflowed its stack
fatal runtime error: stack overflow
zsh: abort      cargo run

% lldb target/debug/stevenarella -o run
Process 4174 stopped
* thread #20, stop reason = EXC_BAD_ACCESS (code=2, address=0x70000d757a98)
    frame #0: 0x000000010117e157 stevenarella`__rust_probestack + 23
stevenarella`__rust_probestack:
->  0x10117e157 <+23>: testq  %rsp, 0x8(%rsp)
    0x10117e15c <+28>: subq   $0x1000, %r11             ; imm = 0x1000 
    0x10117e163 <+35>: cmpq   $0x1000, %r11             ; imm = 0x1000 
    0x10117e16a <+42>: ja     0x10117e150               ; <+16>
Target 0: (stevenarella) stopped.

Process 4174 launched: 'target/debug/stevenarella' (x86_64)
(lldb) bt
error: need to add support for DW_TAG_base_type '()' encoded with DW_ATE = 0x7, bit_size = 0
error: need to add support for DW_TAG_base_type '()' encoded with DW_ATE = 0x7, bit_size = 0
* thread #20, stop reason = EXC_BAD_ACCESS (code=2, address=0x70000d757a98)
  * frame #0: 0x000000010117e157 stevenarella`__rust_probestack + 23
    frame #1: 0x0000000100b19f8e stevenarella`steven_protocol::protocol::packet::packet_by_id::h4978885aa0193b70(version=0, state=<unavailable>, dir=<unavailable>, id=0, buf=<unavailable>) at mod.rs:140
    frame #2: 0x0000000100a66b5d stevenarella`steven_protocol::protocol::Conn::read_packet::ha802bac8d24a7eaa(self=0x000070000d9530f8) at mod.rs:1148:22
    frame #3: 0x0000000100a67b00 stevenarella`steven_protocol::protocol::Conn::do_status::h80fad33fbbfaec98(self=Conn @ 0x000070000d9530f8) at mod.rs:1200:59
    frame #4: 0x000000010027948d stevenarella`stevenarella::screen::server_list::ServerList::reload_server_list::_$u7b$$u7b$closure$u7d$$u7d$::_$u7b$$u7b$closure$u7d$$u7d$::h44c8b89134f91a91((null)=closure-0 @ 0x000070000d953d98, conn=<unavailable>) at server_list.rs:271:38
    frame #5: 0x000000010026216d stevenarella`core::result::Result$LT$T$C$E$GT$::and_then::h9750178b9c146fc8(self=Result<steven_protocol::protocol::Conn, steven_protocol::protocol::Error> @ 0x000070000d956648, op=closure-0 @ 0x000070000d956498) at result.rs:708:22
    frame #6: 0x000000010027955a stevenarella`stevenarella::screen::server_list::ServerList::reload_server_list::_$u7b$$u7b$closure$u7d$$u7d$::hc616dd881b588187 at server_list.rs:270:23
    frame #7: 0x000000010003a0c1 stevenarella`std::sys_common::backtrace::__rust_begin_short_backtrace::hee984852b4d275b5(f=<unavailable>) at backtrace.rs:137:18
    frame #8: 0x00000001002775a1 stevenarella`std::thread::Builder::spawn_unchecked::_$u7b$$u7b$closure$u7d$$u7d$::_$u7b$$u7b$closure$u7d$$u7d$::hb58890e5228a4a2c at mod.rs:464:17
    frame #9: 0x000000010004b111 stevenarella`_$LT$std..panic..AssertUnwindSafe$LT$F$GT$$u20$as$u20$core..ops..function..FnOnce$LT$$LP$$RP$$GT$$GT$::call_once::hf944feb85a12545d(self=<unavailable>, _args=<unavailable>) at panic.rs:308:9
    frame #10: 0x0000000100148509 stevenarella`std::panicking::try::do_call::h4d35e0219f41f9de(data="\x90UO\x03\x01") at panicking.rs:381:40
    frame #11: 0x0000000100148bcd stevenarella`__rust_try + 29
    frame #12: 0x00000001001480fe stevenarella`std::panicking::try::h87bf4f691700d6c0(f=<unavailable>) at panicking.rs:345:19
    frame #13: 0x000000010004b2b1 stevenarella`std::panic::catch_unwind::hc6c9d8434ee244da(f=<unavailable>) at panic.rs:382:14
    frame #14: 0x0000000100276861 stevenarella`std::thread::Builder::spawn_unchecked::_$u7b$$u7b$closure$u7d$$u7d$::h63bdad4e7a2dd187 at mod.rs:463:30
    frame #15: 0x00000001000c0381 stevenarella`core::ops::function::FnOnce::call_once$u7b$$u7b$vtable.shim$u7d$$u7d$::h68f44f53c40952df((null)=0x00000001034f69b0, (null)=<unavailable>) at function.rs:227:5
    frame #16: 0x000000010115ce2d stevenarella`std::sys::unix::thread::Thread::new::thread_start::he3e6719579180a65 [inlined] _$LT$alloc..boxed..Box$LT$F$GT$$u20$as$u20$core..ops..function..FnOnce$LT$A$GT$$GT$::call_once::h761c0c24cc66dea8 at boxed.rs:1042:9 [opt]
    frame #17: 0x000000010115ce27 stevenarella`std::sys::unix::thread::Thread::new::thread_start::he3e6719579180a65 [inlined] _$LT$alloc..boxed..Box$LT$F$GT$$u20$as$u20$core..ops..function..FnOnce$LT$A$GT$$GT$::call_once::h15cdc23ec4ed7bf4 at boxed.rs:1042 [opt]
    frame #18: 0x000000010115ce1e stevenarella`std::sys::unix::thread::Thread::new::thread_start::he3e6719579180a65 at thread.rs:87 [opt]
    frame #19: 0x00007fff2031a950 libsystem_pthread.dylib`_pthread_start + 224
    frame #20: 0x00007fff2031647b libsystem_pthread.dylib`thread_start + 15
(lldb) 

Originally believed this was caused by #262 glow, but it can be reproduced on the master branch easily, just by removing opt-level = 1 for profile.dev. This overflow may have been here for a while. More diagnostics:

https://github.com/iceiix/stevenarella/pull/262#issuecomment-748741080 first detected https://github.com/iceiix/stevenarella/pull/262#issuecomment-749268243 tracing https://github.com/iceiix/stevenarella/pull/262#issuecomment-749314528 src/screen/server_list.rs https://github.com/iceiix/stevenarella/pull/262#issuecomment-751167303 more debugging

iceiix avatar Dec 25 '20 16:12 iceiix

Since it only occurs with unoptimized builds, maybe the stack really is too deep. What is taking up the space, can it be reduced? Or can the stack size be increased? https://stackoverflow.com/questions/44003589/how-to-increase-the-stack-size-available-to-a-rust-library

iceiix avatar Dec 25 '20 19:12 iceiix

reload_server_list isn't necessary to reproduce this issue, the only thing you need is Conn.do_status (or even read_packet itself but I haven't tested it). It seems to occur even in the project with 6 lines of code.

use steven_protocol::protocol::Conn;

#[tokio::main]
async fn main() {
	Conn::new("someserver").unwrap().do_status().unwrap();
}

thatbakamono avatar Dec 25 '20 20:12 thatbakamono

Hmm, trying that minimal repro but not seeing the crash, edited as follows:

diff --git a/Cargo.toml b/Cargo.toml
index 1768be5..6462fc1 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -15,9 +15,10 @@ path = "src/main.rs"
 # Steven runs horrendously slow with no optimizations, and often freezes.
 # However, building with full -O3 optimizations takes too long for a debug build.
 # Use an -O1 optimization level strikes a good compromise between build and program performance.
-opt-level = 1
+#opt-level = 1
 
 [dependencies]
+tokio = { version = "1.0.1", features = ["rt-multi-thread", "macros"] }
 cfg-if = "1.0.0"
 wasm-bindgen = "0.2.69"
 winit = { version = "0.24.0", features = [ "web-sys" ]}
diff --git a/src/main.rs b/src/main.rs
index 0788855..fe3386d 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -214,6 +214,7 @@ cfg_if! {
     }
 }
 
+/*
 cfg_if! {
     if #[cfg(target_arch = "wasm32")] {
         use wasm_bindgen::prelude::*;
@@ -225,6 +226,14 @@ cfg_if! {
         pub fn main() { main2(); }
     }
 }
+*/
+
+use steven_protocol::protocol::Conn;
+
+#[tokio::main]
+async fn main() {
+       Conn::new("localhost", 754).unwrap().do_status().unwrap();
+}
 
 fn main2() {
     let opt = Opt::from_args();

then cargo run completes without errors:

    Finished dev [unoptimized + debuginfo] target(s) in 0.16s
     Running `target/debug/stevenarella

same without async, no crash here... maybe I'm missing something (also tried tokio = "0.2", to match the version used by hyper 0.13.9, used by reqwest 0.10.10)

iceiix avatar Dec 28 '20 02:12 iceiix

I'm not sure why it didn't happen, maybe server sent too little data to overflow the stack? Try with hypixel and see if it happens.

thatbakamono avatar Jan 01 '21 12:01 thatbakamono