wazero
wazero copied to clipboard
Binary built with simd128 works with arm64 but fails with amd64
Describe the bug A clear and concise description of what the bug is.
I am compiling aho-corasick which has some code special-cased for wasm with simd. If I build with simd enabled, test cases that previously worked on both amd64 and arm64 fail on amd64, arm64 passes. This is a difference in behavior, not a crash.
This does not seem to be newly introduced with optimizing compiler.
To Reproduce
git clone https://github.com/anuraaga/go-aho-corasick.git --branch enablesimd
cd go-aho-corasick
GOARCH=amd64 go test ./...
From https://github.com/wasilibs/go-aho-corasick/pull/34
Expected behavior
Test passes
Environment (please complete the relevant information):
- Go version: [e.g. 1.19.1] 1.22.2
- wazero Version: 1.7.1 (also checked with 1.6.0, same behavior)
- Host architecture: amd64 (reproduces with Rosetta on M1 Mac)
- Runtime mode: compiler (interpreter works fine)
Additional context
Unfortunately this is not a crash, just the result is wrong. So I don't know if it's feasible to actually debug this 😓 But posting to at least track. No threads or anything, so at least the behavior is deterministic I think.
Just to confirm, interpreter passes the test, right?
Yup, interpreter works fine, so only amd64 compiler is failing.
just went ahead and run the unit tests in the original Rust aho-corasick repo, and all passed 🤷♂️
well, I reproduced the error with the official unit tests with --release
flag in cargo test --no-run
.__rust_start_panic(i32,i32) i32
0xa5a99: /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/panic_abort/src/lib.rs:96:17 (inlined)
/rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/panic_abort/src/lib.rs:38:5
.rust_panic(i32,i32)
0xa5682: /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/std/src/panicking.rs:833:25
._ZN3std9panicking20rust_panic_with_hook17h173ea7d6a1f035f0E(i32,i32,i32,i32,i32,i32)
0xa55b5: /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/std/src/panicking.rs:803:5
._ZN3std9panicking19begin_panic_handler28_$u7b$$u7b$closure$u7d$$u7d$17h62f35ba2527ec452E(i32)
0xa44cf: /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/std/src/panicking.rs:659:13
._ZN3std10sys_common9backtrace26__rust_end_short_backtrace17hdd245b5cf43b7e94E(i32)
0xa43fa: /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/std/src/sys_common/backtrace.rs:171:18
.rust_begin_unwind(i32)
0xa4f8c: /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/std/src/panicking.rs:647:5
._ZN4core9panicking9panic_fmt17hb516fa06bb502322E(i32,i32)
0xa7a94: /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/core/src/panicking.rs:72:14
._ZN4core9panicking19assert_failed_inner17h7e440e1e2afa7d60E(i32,i32,i32,i32,i32,i32,i32)
0xae785: /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/core/src/panicking.rs:337:23
._ZN4core9panicking13assert_failed17h13c7c442512e8f39E(i32,i32,i32,i32,i32)
0x5a707: /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/core/src/panicking.rs:297:5
._ZN12aho_corasick5tests16run_search_tests17hf1210027171a9317E(i32,i32)
0xf5df: /Users/mathetake/aho-corasick/src/tests.rs:1338:13
._ZN4core3ops8function6FnOnce9call_once17hc2522d0bf33404b3E(i32)
0x53367: /Users/mathetake/aho-corasick/src/tests.rs:709:13 (inlined)
/Users/mathetake/aho-corasick/src/tests.rs:708:19 (inlined)
/rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/core/src/ops/function.rs:250:5
._ZN4test28__rust_begin_short_backtrace17h59a53de8fec48743E(i32,i32)
0x96379: /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/core/src/ops/function.rs:250:5 (inlined)
/rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/test/src/lib.rs:621:18
._ZN4test5types12RunnableTest3run17h75e43b06fc9be44fE(i32,i32)
0x96196: /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/test/src/types.rs:146:40
... maybe followed by omitted frames
._ZN4test8run_test17h2117ed746d4fbceeE(i32,i32,i32,i32,i32,i32,i32,i32)
0x7ac32: /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/test/src/lib.rs:644:60 (inlined)
/rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/core/src/panic/unwind_safe.rs:272:9 (inlined)
/rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/std/src/panicking.rs:554:40 (inlined)
/rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/std/src/panicking.rs:518:19 (inlined)
/rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/std/src/panic.rs:142:14 (inlined)
/rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/test/src/lib.rs:644:27 (inlined)
/rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/test/src/lib.rs:567:43 (inlined)
/rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/test/src/lib.rs:606:17
._ZN4test7console17run_tests_console17hda31b7a2d1edfabfE(i32,i32,i32)
0x75d51: /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/test/src/lib.rs:380:31 (inlined)
/rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/test/src/console.rs:329:5
._ZN4test9test_main17h38f333ea3a200455E(i32,i32,i32,i32,i32)
0x965a9: /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/test/src/lib.rs:143:15
._ZN4test16test_main_static17h574ee807b9b4a692E(i32,i32)
0x971c3: /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/test/src/lib.rs:162:5
._ZN12aho_corasick4main17h4f307d5683b5a88bE()
0x56134: /Users/mathetake/aho-corasick/src/lib.rs:1:1
._ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17hf6cc62b06f47227eE(i32)
0x5cf86: /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/core/src/ops/function.rs:250:5 (inlined)
/rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/std/src/sys_common/backtrace.rs:155:18
._ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h98dc82c73e380a56E.llvm.17769364131994765198(i32) i32
0x5a30e: /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/std/src/rt.rs:166:18
._ZN3std2rt19lang_start_internal17h9dd0d30024c12b69E(i32,i32,i32,i32,i32) i32
0x9e4e9: /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/core/src/ops/function.rs:284:13 (inlined)
/rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/std/src/panicking.rs:554:40 (inlined)
/rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/std/src/panicking.rs:518:19 (inlined)
/rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/std/src/panic.rs:142:14 (inlined)
/rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/std/src/rt.rs:148:48 (inlined)
/rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/std/src/panicking.rs:554:40 (inlined)
/rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/std/src/panicking.rs:518:19 (inlined)
/rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/std/src/panic.rs:142:14 (inlined)
/rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/std/src/rt.rs:148:20
.__main_void() i32
._start()```
just went ahead and run the unit tests in the original Rust aho-corasick repo, and all passed
Oh do you mean you ran the original unit tests compiling to wasm and using wazero? ~~Do you have a command or tip for how to run it?~~ Ah --no-run
, cool tip.
managed to get the smaller reproducer like 200kb (original a few mb)
still wasm-tools shrink is working hard for me 😉
reduced to 40kb, almost there
ok 4kb 👯
finally got small enough repro
(module
(type (;0;) (func (param i32 i32 i32)))
(type (;1;) (func (param i32 i32 i32) (result i32)))
(type (;2;) (func (param i32 i32 i32 i32)))
(type (;3;) (func (param i32 i32)))
(type (;4;) (func (param i32)))
(func (;0;) (type 3) (param i32 i32)
(local i32)
i32.const 13
local.set 2
i32.const 1
i64.const 1
i64.store offset=16 align=4
local.get 2
local.get 2
i32.store16 offset=52
local.get 2
call 3
local.get 2
local.get 2
i64.load align=4
i64.store offset=88
i32.const 0
local.get 2
i32.load offset=88
i32.load offset=88
unreachable
)
(func (;1;) (type 2) (param i32 i32 i32 i32)
i32.const 0
i32.const -701700
i32.store offset=8
i32.const 1
i32.const 1
i32.store offset=8
)
(func (;2;) (type 0) (param i32 i32 i32)
(local i32 i32 v128 v128 v128 v128 v128 v128 v128 v128 v128 v128 v128 v128 v128 v128 v128 v128 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i64 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32)
v128.const i32x4 0x53525150 0x57565554 0x5b5a5958 0x5f5e5d5c
local.set 10
v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000
local.set 20
i32.const 0
i32.load offset=8
local.set 22
local.get 22
local.set 1
i32.const 0
local.set 28
local.get 1
local.tee 30
local.get 30
local.get 30
local.get 30
call 4
local.set 31
i32.const 0
local.get 19
v128.store align=4
i32.const 1
local.get 18
v128.store align=4
i32.const 0
local.get 17
v128.store align=4
local.get 0
local.get 16
v128.store align=4
i32.const 1
local.get 15
v128.store align=4
i32.const 1
local.get 14
v128.store align=4
i32.const 1
local.get 10
v128.store align=4
i32.const 0
local.get 9
v128.store align=4
i32.const 0
local.get 7
v128.store align=4
drop
)
(func (;3;) (type 4) (param i32)
local.get 0
local.get 0
local.get 0
local.get 0
call 1
i32.const 1
i32.const 1
i32.const 0
call 2
)
(func (;4;) (type 1) (param i32 i32 i32) (result i32)
i32.const 1
i32.const 0
local.get 2
memory.fill
i32.const 1
return
)
(memory (;0;) 19)
(export "" (func 0))
)
I won't be able to debug this, so in case anyone try to fix, see https://github.com/tetratelabs/wazero/pull/2201 for reproducing the bug