Odin
Odin copied to clipboard
LLVM SIMD code generation bug - "Both operands to ICmp instruction are not of the same type!"
Context
Please provide any relevant information about your setup. This is important in case the issue is not reproducible except for under certain conditions.
- Operating System & Odin Version: Windows 10, version dev-2024-12:597fba7c3
- Please paste
odin reportoutput: Odin: dev-2024-12:597fba7c3 OS: Windows 10 Professional (version: 20H2), build 19042.1706 CPU: Intel(R) Core(TM) i7-10700KF CPU @ 3.80GHz RAM: 32637 MiB Backend: LLVM 18.1.8
Expected Behavior
LLVM compiles the proc GET_ANIM_IDX_SIMD correctly
Current Behavior
Following llvm error is generated with "odin run .":
` LLVM CODE GEN FAILED FOR PROCEDURE: main.GET_ANIM_IDX_SIMD define i32 @main.GET_ANIM_IDX_SIMD(ptr %0, i32 %1, ptr noalias nocapture nonnull %__.context_ptr) { decls: %2 = alloca <4 x i32>, align 16 %3 = alloca i32, align 4 %4 = alloca <4 x i32>, align 16 %INDICES = alloca <4 x i32>, align 16 %GT = alloca <4 x i32>, align 16 %result = alloca i32, align 4 br label %entry
entry: ; preds = %decls call void @llvm.memcpy.inline.p0.p0.i64(ptr %2, ptr %0, i64 16, i1 false) store i32 %1, ptr %3, align 4 store <4 x i32> zeroinitializer, ptr %4, align 16 %5 = insertelement <4 x i32> zeroinitializer, i32 %1, i32 0 %6 = insertelement <4 x i32> %5, i32 %1, i32 1 %7 = insertelement <4 x i32> %6, i32 %1, i32 2 %8 = insertelement <4 x i32> %7, i32 %1, i32 3 store <4 x i32> %8, ptr %4, align 16 %9 = load <4 x i32>, ptr %4, align 16 store <4 x i32> %9, ptr %INDICES, align 16 %10 = load <4 x i32>, ptr %2, align 16 %11 = icmp sgt <4 x i32> %10, i32 %1 %12 = sext <4 x i1> %11 to <4 x i32> store <4 x i32> %12, ptr %GT, align 16 %13 = load <4 x i32>, ptr %GT, align 16 %14 = sub <4 x i32> zeroinitializer, %13 store <4 x i32> %14, ptr %GT, align 16 %15 = load <4 x i32>, ptr %GT, align 16 %16 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %15) store i32 %16, ptr %result, align 4 %17 = load i32, ptr %result, align 4 ret i32 %17 }
Both operands to ICmp instruction are not of the same type! %11 = icmp sgt <4 x i32> %10, i32 %1
Steps to Reproduce
Run the provided code
package main;
import "core:fmt";
Universal_Player_States :: enum { _5, _2, _Transition_5_to_2, _Transition_2_to_5, _WALK_F, _WALK_B, _DASH_F, _DASH_B, _5G, _2G, _Transition_5G_to_2G, _Transition_2G_to_5G, _Blockstun_5ing, _Blockstun_2ing, _Hitstun_Stand_Highreel, _Hitstun_Stand_Midgut, _Hitstun_Crouch_Reel, _ThrowBreaking, _Dead, _Ringout, }
// Optimise this to make this a flat array of 5 elements? perhaps 8 for cache alignment? or does just the whole thing need to be cache aligned? // curious optimisation cases
HBOX_DURATIONS_UNIVERSAL :: [Universal_Player_States][]i32{ ._5 = {66, }, ._2 = {255,}, ._Transition_5_to_2 = {3,}, ._Transition_2_to_5 = {3, }, ._WALK_F = {32,}, ._WALK_B = {28,}, ._DASH_F = {16,}, ._DASH_B = {14,}, ._5G = {255,}, ._2G = {255,}, ._Transition_5G_to_2G = {5,}, ._Transition_2G_to_5G = {4,}, ._Blockstun_5ing = {255,}, ._Blockstun_2ing = {255,}, ._Hitstun_Stand_Highreel = {255,}, ._Hitstun_Stand_Midgut = {255,}, ._Hitstun_Crouch_Reel = {255,}, ._ThrowBreaking = {38,}, ._Dead = {35, 155}, ._Ringout = {255,}, }
HBOX_DURATIONS_CLASSIC :: [Classic_Player_State][]i32{ ._5S = {11, 14, 18, 27,}, ._2S = {11, 13, 17, 33,}, ._6S = {13, 15, 26, 32, 36,}, ._3S = {14, 16, 20, 42,}, ._8S = {7, 19, 21, 27, 42,}, ._5Throw = {9, 14, 33,}, ._5Throw_Hitting = {12, 14, 36}, ._4Throw = {9, 14, 33,}, ._4Throw_Hitting = {24, 34, 45, 52,}, }
Character :: enum { Classic, Wrassler, Striker, CounterMan, }
Classic_Player_State :: enum { _5S, _2S, _6S, _3S, _8S, _5Throw, _5Throw_Hitting, _4Throw, _4Throw_Hitting, }
Wrassler_Player_State :: enum { _5S, _2S, _6S, _3S, _4S, _5Throw, _5Throw_Hitting, _4Throw, _4Throw_Hitting, _6Throw, _6Throw_Hitting, }
Striker_Player_State :: enum { _5S, _2S, _6S, _6Sstr_S, _3S, _1S, _5Throw, _5Throw_Hitting, _6Throw, _6Throw_Hitting, }
CounterMan_Player_State :: enum { _5S, _2S, _6S, _8S, _3S, _1S, _5Throw, _5Throw_Hitting, _6Throw, _6Throw_Hitting, _4Throw, _4Throw_Hitting, }
Which_Player_State :: enum { _Universal, _CharSpecific, }
// Problem: Need to use an enum for which character they are using anyway // so which should just be 1 bit 61 thingo anyway
_Player_State_1 :: bit_field u64 { which: Which_Player_State | 1, p_state: Universal_Player_States | 63, }
_Player_State_2 :: bit_field u64 { which: Which_Player_State | 1, c_state: Classic_Player_State | 63, }
_Player_State_3 :: bit_field u64 { which: Which_Player_State | 1, c_state: Wrassler_Player_State | 63, }
_Player_State_4 :: bit_field u64 { which: Which_Player_State | 1, c_state: Striker_Player_State | 63, }
_Player_State_5 :: bit_field u64 { which: Which_Player_State | 1, c_state: CounterMan_Player_State | 63, }
Player_State :: struct #raw_union { using _: _Player_State_1, using _: _Player_State_2, using _: _Player_State_3, using _: _Player_State_4, using _: _Player_State_5, }
Cursed_1 :: bit_field u64 { uh_oh: i32 | 32, }
Cursed_2 :: bit_field u64 { uh_oh: u8 | 8, }
Cursed :: struct #raw_union { using _:Cursed_1 }
Player_Data :: struct { state_counter: i32, character: Character, current_state: Player_State, }
GET_ANIM_IDX_ORIGINAL :: proc (dur:[]i32, state_counter:i32) -> i32 { anim_idx := i32{} for el, idx in dur { if state_counter > el do continue; anim_idx = cast(i32)idx break; } return anim_idx }
_5S_SIMD :: #simd[4]i32{11, 14, 18, 27,}
import "core:simd"
GET_ANIM_IDX_SIMD :: proc (dur:#simd[4]i32, state_counter:i32) -> i32 {
INDICES : #simd[4]i32 = {state_counter, state_counter, state_counter, state_counter}
GT := simd.lanes_gt(dur, state_counter) GT = simd.neg(GT) result := simd.reduce_add_ordered(GT) return transmute(i32)result }
// import "shared:prof"
expected_5S_index := [28]i32 { 0 = 0, 1 = 0, 2 = 0, 3 = 0, 4 = 0, 5 = 0, 6 = 0, 7 = 0, 8 = 0, 9 = 0, 10 = 0, 11 = 0, 12 = 1, 13 = 1, 14 = 1, 15 = 2, 16 = 2, 17 = 2, 18 = 2, 19 = 3, 20 = 3, 21 = 3, 22 = 3, 23 = 3, 24 = 3, 25 = 3, 26 = 3, 27 = 3, }
main :: proc(){ fmt.printf("Hello, world! Your Odin project is set up.\n") honk:Player_State honk.which = ._Universal honk.p_state = ._5 honk.c_state = ._5S for i in cast(i32)0..=27 { result := GET_ANIM_IDX_ORIGINAL(HBOX_DURATIONS_CLASSIC[._5S], i) assert(result == expected_5S_index[i]) } fmt.printf("Original test passed!") for i in cast(i32)0..=27 { result := GET_ANIM_IDX_SIMD(_5S_SIMD, i) assert(result == expected_5S_index[i]) } fmt.printf("SIMD test passed!") result:i32 // prof.TIME_TYPE = .micros
fmt.printf("Original algorithm: ") for x in 0..<1_000_000 { for i in cast(i32)0..=27 { // prof.profile_start(.micros) result = GET_ANIM_IDX_ORIGINAL(HBOX_DURATIONS_CLASSIC[._5S], i) // prof.profile_end() assert(result != -1) } } // prof.average_profiling_data()
fmt.printf("SIMD algorithm: ") for x in 0..<1_000_000 { for i in cast(i32)0..=27 { // prof.profile_start(.micros) result = GET_ANIM_IDX_SIMD(_5S_SIMD, i) // prof.profile_end() assert(result != -1) } } // prof.average_profiling_data()
fmt.printf("cursed %v \n", honk.which) };