Odin
Odin copied to clipboard
Occational miscompile of `(matrix[2,2]f32)(f32_value)`
Context
Odin: dev-2022-09-nightly:e7fb2cf7 OS: Windows 10 Home Basic (version: 21H1), build 19043.2006 CPU: AMD Ryzen 7 3700X 8-Core Processor RAM: 32694 MiB
Expected Behavior
The following program should consistently compile and print matrix[1.000, 0.000; 0.000, 1.000]
package mbug
import "core:fmt"
main :: proc() {
l := f32(1.0)
m := (matrix[2,2]f32)(l) // this miscompile
fmt.println(m)
}
Current Behavior
Occationally the compiler would produce a binary that prints incorrect values like
matrix[1.000, -119900579919308946900132763061452.000; 0.000, 1.000]
or matrix[1.000, 0.014; 0.000, 1.000]
The behaviour is non-deterministic, hit odin run mbug.odin -file
multiple times (5-10 times) would trigger at least one miscompile in my experence.
Failure Information (for bugs)
Steps to Reproduce
hit compile multiple times
Failure Logs
C:\projects\testground>odin run mbug.odin -file -debug
matrix[1.000, 0.000; 0.000, 1.000]
C:\projects\testground>odin run mbug.odin -file -debug
matrix[1.000, -0.000; 0.000, 1.000]
C:\projects\testground>odin run mbug.odin -file -debug
matrix[1.000, 0.000; 0.000, 1.000]
C:\projects\testground>odin run mbug.odin -file -debug
matrix[1.000, 0.014; 0.000, 1.000]
C:\projects\testground>odin run mbug.odin -file
matrix[1.000, -119900579919308946900132763061452.000; 0.000, 1.000]
Some notes from reproducing this bug:
- o:size doesn't produce this bug.
- o:minimal wrong result seems random like you said, but also, the compiled .exe run in a debugger, if not miscompiled will consistently produce the correct 0, HOWEVER, if miscompiled, it SOMETIMES produces the correct 0 result, and others times produces the wrong result (on subsequent reruns on the .exe).
- on o:speed, the wrong result is always the same across compilations, also sometimes produces the correct 0.
Still an issue in odin version dev-2023-05-nightly:0c352213
I've looked at the codegen and it seems the zeros in the matrix are simply not initialized, so they take on whatever random value was on the stack. This explains why sometimes it appears to work. Here's an annotated listing for reference.
mbug.main:
; set up stack frame etc
push rbp
sub rsp,B0
lea rbp,qword ptr ss:[rsp+80]
and rsp,FFFFFFFFFFFFFFE0
mov qword ptr ss:[rsp+28],rcx
mov r8,qword ptr ss:[rsp+28]
; load 1.0 into xmm0
mov dword ptr ss:[rsp+AC],3F800000
movss xmm0,dword ptr ss:[rsp+AC]
; write 1.0 to rsp+80 and rsp+8C, note that rsp+84 and rsp+88 should have been zeroed here
movss dword ptr ss:[rsp+80],xmm0
movss dword ptr ss:[rsp+8C],xmm0
; copy 16 bytes from rsp+80..rsp+90 (this is our matrix) to rsp+60..rsp+70
movss xmm0,dword ptr ss:[rsp+80]
movss xmm1,dword ptr ss:[rsp+84]
movss xmm2,dword ptr ss:[rsp+88]
movss xmm3,dword ptr ss:[rsp+8C]
movss dword ptr ss:[rsp+6C],xmm3
movss dword ptr ss:[rsp+68],xmm2
movss dword ptr ss:[rsp+64],xmm1
movss dword ptr ss:[rsp+60],xmm0
; clear xmm0, it will only be used to zero out blocks of 16 bytes after this
xorps xmm0,xmm0
; zero out rsp+50..rsp+60
movaps xmmword ptr ss:[rsp+50],xmm0
; rax = pointer to matrix, stashed at rsp+50
lea rax,qword ptr ss:[rsp+60]
mov qword ptr ss:[rsp+50],rax
; unrelated store
mov rax,1A00000000000002
mov qword ptr ss:[rsp+58],rax
; rax = pointer to matrix
mov rax,qword ptr ss:[rsp+50]
; unrelated load
mov rcx,qword ptr ss:[rsp+58]
; zero out rsp+30..rsp+50
movaps xmmword ptr ss:[rsp+40],xmm0
movaps xmmword ptr ss:[rsp+30],xmm0
; unrelated store
mov qword ptr ss:[rsp+38],rcx
; stash another pointer to the matrix at rsp+30
mov qword ptr ss:[rsp+30],rax
; rax = pointer to that pointer
lea rax,qword ptr ss:[rsp+30]
; stash that at rsp+40
mov qword ptr ss:[rsp+40],rax
; unrelated store
mov qword ptr ss:[rsp+48],1
; rcx = pointer-to-pointer-to-matrix
lea rcx,qword ptr ss:[rsp+40]
lea rdx,qword ptr ds:[7FF6E86794D8]
; (win x64 convention) rcx passed as first parameter here
call <mbug.fmt.println>
nop
; exit function
lea rsp,qword ptr ss:[rbp+30]
pop rbp
ret