wazero icon indicating copy to clipboard operation
wazero copied to clipboard

Binary built with simd128 works with arm64 but fails with amd64

Open anuraaga opened this issue 9 months ago • 11 comments

Describe the bug A clear and concise description of what the bug is.

I am compiling aho-corasick which has some code special-cased for wasm with simd. If I build with simd enabled, test cases that previously worked on both amd64 and arm64 fail on amd64, arm64 passes. This is a difference in behavior, not a crash.

This does not seem to be newly introduced with optimizing compiler.

To Reproduce

git clone https://github.com/anuraaga/go-aho-corasick.git --branch enablesimd
cd go-aho-corasick
GOARCH=amd64 go test ./...

From https://github.com/wasilibs/go-aho-corasick/pull/34

Expected behavior

Test passes

Environment (please complete the relevant information):

  • Go version: [e.g. 1.19.1] 1.22.2
  • wazero Version: 1.7.1 (also checked with 1.6.0, same behavior)
  • Host architecture: amd64 (reproduces with Rosetta on M1 Mac)
  • Runtime mode: compiler (interpreter works fine)

Additional context

Unfortunately this is not a crash, just the result is wrong. So I don't know if it's feasible to actually debug this 😓 But posting to at least track. No threads or anything, so at least the behavior is deterministic I think.

anuraaga avatar Apr 30 '24 03:04 anuraaga

Just to confirm, interpreter passes the test, right?

ncruces avatar Apr 30 '24 04:04 ncruces

Yup, interpreter works fine, so only amd64 compiler is failing.

anuraaga avatar Apr 30 '24 04:04 anuraaga

just went ahead and run the unit tests in the original Rust aho-corasick repo, and all passed 🤷‍♂️

mathetake avatar Apr 30 '24 07:04 mathetake

well, I reproduced the error with the official unit tests with --release flag in cargo test --no-run

	.__rust_start_panic(i32,i32) i32
		0xa5a99: /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/panic_abort/src/lib.rs:96:17 (inlined)
		         /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/panic_abort/src/lib.rs:38:5
	.rust_panic(i32,i32)
		0xa5682: /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/std/src/panicking.rs:833:25
	._ZN3std9panicking20rust_panic_with_hook17h173ea7d6a1f035f0E(i32,i32,i32,i32,i32,i32)
		0xa55b5: /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/std/src/panicking.rs:803:5
	._ZN3std9panicking19begin_panic_handler28_$u7b$$u7b$closure$u7d$$u7d$17h62f35ba2527ec452E(i32)
		0xa44cf: /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/std/src/panicking.rs:659:13
	._ZN3std10sys_common9backtrace26__rust_end_short_backtrace17hdd245b5cf43b7e94E(i32)
		0xa43fa: /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/std/src/sys_common/backtrace.rs:171:18
	.rust_begin_unwind(i32)
		0xa4f8c: /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/std/src/panicking.rs:647:5
	._ZN4core9panicking9panic_fmt17hb516fa06bb502322E(i32,i32)
		0xa7a94: /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/core/src/panicking.rs:72:14
	._ZN4core9panicking19assert_failed_inner17h7e440e1e2afa7d60E(i32,i32,i32,i32,i32,i32,i32)
		0xae785: /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/core/src/panicking.rs:337:23
	._ZN4core9panicking13assert_failed17h13c7c442512e8f39E(i32,i32,i32,i32,i32)
		0x5a707: /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/core/src/panicking.rs:297:5
	._ZN12aho_corasick5tests16run_search_tests17hf1210027171a9317E(i32,i32)
		0xf5df: /Users/mathetake/aho-corasick/src/tests.rs:1338:13
	._ZN4core3ops8function6FnOnce9call_once17hc2522d0bf33404b3E(i32)
		0x53367: /Users/mathetake/aho-corasick/src/tests.rs:709:13 (inlined)
		         /Users/mathetake/aho-corasick/src/tests.rs:708:19 (inlined)
		         /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/core/src/ops/function.rs:250:5
	._ZN4test28__rust_begin_short_backtrace17h59a53de8fec48743E(i32,i32)
		0x96379: /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/core/src/ops/function.rs:250:5 (inlined)
		         /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/test/src/lib.rs:621:18
	._ZN4test5types12RunnableTest3run17h75e43b06fc9be44fE(i32,i32)
		0x96196: /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/test/src/types.rs:146:40
	... maybe followed by omitted frames
	._ZN4test8run_test17h2117ed746d4fbceeE(i32,i32,i32,i32,i32,i32,i32,i32)
		0x7ac32: /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/test/src/lib.rs:644:60 (inlined)
		         /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/core/src/panic/unwind_safe.rs:272:9 (inlined)
		         /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/std/src/panicking.rs:554:40 (inlined)
		         /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/std/src/panicking.rs:518:19 (inlined)
		         /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/std/src/panic.rs:142:14 (inlined)
		         /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/test/src/lib.rs:644:27 (inlined)
		         /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/test/src/lib.rs:567:43 (inlined)
		         /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/test/src/lib.rs:606:17
	._ZN4test7console17run_tests_console17hda31b7a2d1edfabfE(i32,i32,i32)
		0x75d51: /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/test/src/lib.rs:380:31 (inlined)
		         /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/test/src/console.rs:329:5
	._ZN4test9test_main17h38f333ea3a200455E(i32,i32,i32,i32,i32)
		0x965a9: /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/test/src/lib.rs:143:15
	._ZN4test16test_main_static17h574ee807b9b4a692E(i32,i32)
		0x971c3: /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/test/src/lib.rs:162:5
	._ZN12aho_corasick4main17h4f307d5683b5a88bE()
		0x56134: /Users/mathetake/aho-corasick/src/lib.rs:1:1
	._ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17hf6cc62b06f47227eE(i32)
		0x5cf86: /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/core/src/ops/function.rs:250:5 (inlined)
		         /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/std/src/sys_common/backtrace.rs:155:18
	._ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h98dc82c73e380a56E.llvm.17769364131994765198(i32) i32
		0x5a30e: /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/std/src/rt.rs:166:18
	._ZN3std2rt19lang_start_internal17h9dd0d30024c12b69E(i32,i32,i32,i32,i32) i32
		0x9e4e9: /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/core/src/ops/function.rs:284:13 (inlined)
		         /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/std/src/panicking.rs:554:40 (inlined)
		         /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/std/src/panicking.rs:518:19 (inlined)
		         /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/std/src/panic.rs:142:14 (inlined)
		         /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/std/src/rt.rs:148:48 (inlined)
		         /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/std/src/panicking.rs:554:40 (inlined)
		         /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/std/src/panicking.rs:518:19 (inlined)
		         /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/std/src/panic.rs:142:14 (inlined)
		         /rustc/ef71f1047e04438181d7cb925a833e2ada6ab390/library/std/src/rt.rs:148:20
	.__main_void() i32
	._start()```

mathetake avatar Apr 30 '24 07:04 mathetake

just went ahead and run the unit tests in the original Rust aho-corasick repo, and all passed

Oh do you mean you ran the original unit tests compiling to wasm and using wazero? ~~Do you have a command or tip for how to run it?~~ Ah --no-run, cool tip.

anuraaga avatar Apr 30 '24 07:04 anuraaga

image

mathetake avatar Apr 30 '24 07:04 mathetake

managed to get the smaller reproducer like 200kb (original a few mb)

mathetake avatar May 01 '24 09:05 mathetake

still wasm-tools shrink is working hard for me 😉

mathetake avatar May 01 '24 09:05 mathetake

reduced to 40kb, almost there

mathetake avatar May 03 '24 09:05 mathetake

ok 4kb 👯

mathetake avatar May 03 '24 11:05 mathetake

finally got small enough repro

(module
  (type (;0;) (func (param i32 i32 i32)))
  (type (;1;) (func (param i32 i32 i32) (result i32)))
  (type (;2;) (func (param i32 i32 i32 i32)))
  (type (;3;) (func (param i32 i32)))
  (type (;4;) (func (param i32)))
  (func (;0;) (type 3) (param i32 i32)
    (local i32)
    i32.const 13
    local.set 2
    i32.const 1
    i64.const 1
    i64.store offset=16 align=4
    local.get 2
    local.get 2
    i32.store16 offset=52
    local.get 2
    call 3
    local.get 2
    local.get 2
    i64.load align=4
    i64.store offset=88
    i32.const 0
    local.get 2
    i32.load offset=88
    i32.load offset=88
    unreachable
  )
  (func (;1;) (type 2) (param i32 i32 i32 i32)
    i32.const 0
    i32.const -701700
    i32.store offset=8
    i32.const 1
    i32.const 1
    i32.store offset=8
  )
  (func (;2;) (type 0) (param i32 i32 i32)
    (local i32 i32 v128 v128 v128 v128 v128 v128 v128 v128 v128 v128 v128 v128 v128 v128 v128 v128 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i64 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32)
    v128.const i32x4 0x53525150 0x57565554 0x5b5a5958 0x5f5e5d5c
    local.set 10
    v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000
    local.set 20
    i32.const 0
    i32.load offset=8
    local.set 22
    local.get 22
    local.set 1
    i32.const 0
    local.set 28
    local.get 1
    local.tee 30
    local.get 30
    local.get 30
    local.get 30
    call 4
    local.set 31
    i32.const 0
    local.get 19
    v128.store align=4
    i32.const 1
    local.get 18
    v128.store align=4
    i32.const 0
    local.get 17
    v128.store align=4
    local.get 0
    local.get 16
    v128.store align=4
    i32.const 1
    local.get 15
    v128.store align=4
    i32.const 1
    local.get 14
    v128.store align=4
    i32.const 1
    local.get 10
    v128.store align=4
    i32.const 0
    local.get 9
    v128.store align=4
    i32.const 0
    local.get 7
    v128.store align=4
    drop
  )
  (func (;3;) (type 4) (param i32)
    local.get 0
    local.get 0
    local.get 0
    local.get 0
    call 1
    i32.const 1
    i32.const 1
    i32.const 0
    call 2
  )
  (func (;4;) (type 1) (param i32 i32 i32) (result i32)
    i32.const 1
    i32.const 0
    local.get 2
    memory.fill
    i32.const 1
    return
  )
  (memory (;0;) 19)
  (export "" (func 0))
)

mathetake avatar May 03 '24 12:05 mathetake

I won't be able to debug this, so in case anyone try to fix, see https://github.com/tetratelabs/wazero/pull/2201 for reproducing the bug

mathetake avatar May 05 '24 05:05 mathetake