Simple swap statement assigning incorrect value
Context
Please provide any relevant information about your setup. This is important in case the issue is not reproducible except for under certain conditions.
- Operating System & Odin Version: Odin: dev-2022-07:c9c3611b OS: Windows 11 Home Basic (version: 21H2), build 22000.795 CPU: 11th Gen Intel(R) Core(TM) i7-11370H @ 3.30GHz RAM: 32602 MiB
Expected Behavior
The values are swapped
Current Behavior
Both values are assigned with the left value.
Failure Information (for bugs)
Please help provide information about the failure if this is a bug. If it is not a bug, please remove the rest of this template.
Steps to Reproduce
Please provide detailed steps for reproducing the issue.
I could not make a minimal repro at the moment. It might be that once a struct is a certain size that it reproduces? I'm not sure.
Failure Logs
e.g.
log.info("Before swap new_input:", new_app_input)
log.info("Before swap last_input:", last_app_input)
new_app_input, last_app_input = last_app_input, new_app_input
log.info("After swap new_input:", new_app_input)
log.info("After swap last_input:", last_app_input)
Results in: [INFO ] --- [2022-07-26 17:04:10] [win32_sol.odin:1138:main()] Before swap new_input: Input{frame_dt = 0.017, gained_focus = false, text = [], mouse_pos = [2032, 1395], mouse_d = [30, -22], mouse_wheel_scroll = 0, keys = [.Mouse_Left = Key_State{pressed = false, half_transitions = 0}, [MORE DATA]
[INFO ] --- [2022-07-26 17:04:10] [win32_sol.odin:1139:main()] Before swap last_input: Input{frame_dt = 0.000, gained_focus = false, text = [], mouse_pos = [0, 0], mouse_d = [0, 0], mouse_wheel_scroll = 0, keys = [.Mouse_Left = Key_State{pressed = false, half_transitions = 0}, [MORE DATA]
[INFO ] --- [2022-07-26 17:04:10] [win32_sol.odin:1141:main()] After swap new_input: Input{frame_dt = 0.000, gained_focus = false, text = [], mouse_pos = [0, 0], mouse_d = [0, 0], mouse_wheel_scroll = 0, keys = [.Mouse_Left = Key_State{pressed = false, half_transitions = 0}, [MORE DATA]
[INFO ] --- [2022-07-26 17:04:10] [win32_sol.odin:1142:main()] After swap last_input: Input{frame_dt = 0.000, gained_focus = false, text = [], mouse_pos = [0, 0], mouse_d = [0, 0], mouse_wheel_scroll = 0, keys = [.Mouse_Left = Key_State{pressed = false, half_transitions = 0}, [MORE DATA]
As you can see, after the swap both of the struct have the data of last_app_input before the assignment....
Found a minimal repro. Seems to happen once the struct is a certain size. Everything gets zeroed out in the swap statement:
package main
import "core:fmt"
Foo :: struct {
a: f32,
b: f32,
c: [256]i64
}
main :: proc() {
new_foo, prev_foo: Foo
loop: for {
new_foo = {}
fmt.println("New foo top of loop:", new_foo)
fmt.println("Prev foo top of loop:", prev_foo)
new_foo.a = 256.0
new_foo.b = 128.0
fmt.println("New foo before swap:", new_foo)
fmt.println("Prev foo before swap:", prev_foo)
new_foo, prev_foo = prev_foo, new_foo
fmt.println("New foo after swap:", new_foo)
fmt.println("Prev foo after swap:", prev_foo)
}
}
Result:
New foo before swap: Foo{a = 256.000, b = 128.000, c = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}
Prev foo before swap: Foo{a = 0.000, b = 0.000, c = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}
New foo after swap: Foo{a = 0.000, b = 0.000, c = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}
Prev foo after swap: Foo{a = 0.000, b = 0.000, c = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}
This is the offending commit: https://github.com/odin-lang/Odin/commit/4d06a54c0ce7266d8f30c0f6b10dd527c3f9d8c6
The code works as expected before this one.
I can confirm this bug. The builtin swap does not work with bigger structs.
Here is my minimal repo:
Foo :: struct #align 64 { n: int } //use align to grow size (tested with 64 and 128)
a := Foo { 2 }
b := Foo { 0 }
fmt.print(a)
a, b = b, a
fmt.print(b)
On size 64 bytes: Foo{n = 2}Foo{n = 2} (works as expected)
On size 128 bytes: Foo{n = 2}Foo{n = 0} (bug, values are zeroed)
tested on dev-2022-11:382bd876 and Linux
I ran into this bug the other day and did some more thorough investigation on it. Output of odin report on my machine:
Odin: dev-2022-12-nightly:521ed286
OS: Windows 10 Home Basic (version: 21H2), build 19044.2364
CPU: Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz
RAM: 16285 MiB
Here's the test program I used:
package swap_assignment_bug
check_result :: #force_no_inline proc(a: $T, b: T) {
assert(a.x[0] == 0xCD && b.x[0] == 0xAB)
}
swap_assign_n :: #force_no_inline proc ($N: uint) {
a, b : struct {
x: [N]u8,
}
a.x[0] = 0xAB
b.x[0] = 0xCD
a, b = b, a
check_result(a, b)
}
main :: #force_no_inline proc() {
swap_assign_n(64) // Succeeds
swap_assign_n(65) // Fails
}
The #force_no_inline tags are to make the disassembly easier to read. The bug manifests with or without them. When the structure size is increased from 64 to 65 bytes, Odin generates LLVM code using slightly different operations. Here's a pared-down version of this code (obtained using Odin's -build-mode:llvm flag):
; Function Attrs: noinline
define internal void @swap_assignment_bug.swap_assign_n-2882(i8* noalias nocapture nonnull %__.context_ptr) #3 {
decls:
%0 = alloca { [64 x i8] }, align 1
%1 = alloca { [64 x i8] }, align 1
br label %entry
entry: ; preds = %decls
%2 = bitcast { [64 x i8] }* %0 to i8*
call void @llvm.memset.p0i8.i64(i8* %2, i8 0, i64 64, i1 false)
%3 = bitcast { [64 x i8] }* %1 to i8*
call void @llvm.memset.p0i8.i64(i8* %3, i8 0, i64 64, i1 false)
%4 = getelementptr inbounds { [64 x i8] }, { [64 x i8] }* %0, i32 0, i32 0
%5 = getelementptr [64 x i8], [64 x i8]* %4, i64 0, i64 0
store i8 -85, i8* %5, align 1
%6 = getelementptr inbounds { [64 x i8] }, { [64 x i8] }* %1, i32 0, i32 0
%7 = getelementptr [64 x i8], [64 x i8]* %6, i64 0, i64 0
store i8 -51, i8* %7, align 1
%8 = load { [64 x i8] }, { [64 x i8] }* %0, align 1
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %2, i8* align 1 %3, i64 64, i1 false)
store { [64 x i8] } %8, { [64 x i8] }* %1, align 1
call void @swap_assignment_bug.check_result-3144({ [64 x i8] }* %0, { [64 x i8] }* %1, i8* %__.context_ptr) #3
ret void
}
; Function Attrs: noinline
define internal void @swap_assignment_bug.swap_assign_n-2885(i8* noalias nocapture nonnull %__.context_ptr) #3 {
decls:
%0 = alloca { [65 x i8] }, align 1
%1 = alloca { [65 x i8] }, align 1
br label %entry
entry: ; preds = %decls
%2 = bitcast { [65 x i8] }* %0 to i8*
call void @llvm.memset.p0i8.i64(i8* %2, i8 0, i64 65, i1 false)
%3 = bitcast { [65 x i8] }* %1 to i8*
call void @llvm.memset.p0i8.i64(i8* %3, i8 0, i64 65, i1 false)
%4 = getelementptr inbounds { [65 x i8] }, { [65 x i8] }* %0, i32 0, i32 0
%5 = getelementptr [65 x i8], [65 x i8]* %4, i64 0, i64 0
store i8 -85, i8* %5, align 1
%6 = getelementptr inbounds { [65 x i8] }, { [65 x i8] }* %1, i32 0, i32 0
%7 = getelementptr [65 x i8], [65 x i8]* %6, i64 0, i64 0
store i8 -51, i8* %7, align 1
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %2, i8* align 1 %3, i64 65, i1 false)
call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %3, i8* align 1 %3, i64 65, i1 false)
call void @swap_assignment_bug.check_result-3154({ [65 x i8] }* %0, { [65 x i8] }* %1, i8* %__.context_ptr) #3
ret void
}
The main difference between these two versions of swap_assign_n are that the 64 byte version stores into a temporary register %8 prior to doing any copying; while the 65 byte version omits this store and performs a memcpy straight away.