wgpu icon indicating copy to clipboard operation
wgpu copied to clipboard

[spv-in] Naga miscompiles SPIR-V with branches that exit more than one level of structured control flow

Open DJMcNab opened this issue 4 years ago • 10 comments

Using rust-gpu, this shader:

#[spirv(fragment)]
pub fn main_fs(output: &mut Vec4) {
    let mut iter = 0..1;
    loop {
        match iter.next() {
            Some(_) => (),
            None => break,
        };
    }

    *output = vec4(1.0, 0.0, 0.0, 1.0);
}

Notice that this is the core of what a for loop generated

The downloadable SPIRV and disassembly

A built version of the shader is here: simplest_shader.zip)

And the dissasembly is:

; SPIR-V
; Version: 1.3
; Generator: Unknown(27); 0
; Bound: 92
; Schema: 0
               OpCapability Shader
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_vulkan_memory_model"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %1 "main_fs" %2
               OpEntryPoint Vertex %3 "main_vs" %gl_VertexIndex %gl_Position
               OpExecutionMode %1 OriginUpperLeft
               OpMemberDecorate %_struct_17 0 Offset 0
               OpMemberDecorate %_struct_17 1 Offset 4
               OpMemberDecorate %_struct_18 0 Offset 0
               OpMemberDecorate %_struct_18 1 Offset 4
               OpDecorate %2 Location 0
               OpDecorate %gl_VertexIndex BuiltIn VertexIndex
               OpDecorate %gl_Position BuiltIn Position
               OpDecorate %gl_Position Invariant
        %int = OpTypeInt 32 1
%_ptr_Input_int = OpTypePointer Input %int
       %void = OpTypeVoid
       %uint = OpTypeInt 32 0
      %float = OpTypeFloat 32
    %v4float = OpTypeVector %float 4
 %_struct_17 = OpTypeStruct %uint %int
 %_struct_18 = OpTypeStruct %int %int
       %bool = OpTypeBool
%_ptr_Output_v4float = OpTypePointer Output %v4float
         %27 = OpTypeFunction %void
          %2 = OpVariable %_ptr_Output_v4float Output
%gl_VertexIndex = OpVariable %_ptr_Input_int Input
%gl_Position = OpVariable %_ptr_Output_v4float Output
     %uint_1 = OpConstant %uint 1
     %uint_0 = OpConstant %uint 0
         %30 = OpUndef %_struct_17
      %int_0 = OpConstant %int 0
      %int_1 = OpConstant %int 1
    %float_1 = OpConstant %float 1
    %float_0 = OpConstant %float 0
      %int_2 = OpConstant %int 2
       %true = OpConstantTrue %bool
      %false = OpConstantFalse %bool
         %89 = OpConstantComposite %_struct_18 %int_0 %int_1
         %91 = OpConstantComposite %v4float %float_1 %float_0 %float_0 %float_1
          %1 = OpFunction %void None %27
         %37 = OpLabel
               OpSelectionMerge %79 None
               OpSwitch %uint_0 %80
         %80 = OpLabel
               OpBranch %40
         %40 = OpLabel
         %41 = OpPhi %_struct_17 %30 %80 %42 %43
         %44 = OpPhi %_struct_18 %89 %80 %45 %43
               OpLoopMerge %48 %43 None
               OpBranch %49
         %49 = OpLabel
         %50 = OpCompositeExtract %int %44 0
         %51 = OpCompositeExtract %int %44 1
         %52 = OpSLessThan %bool %50 %51
               OpSelectionMerge %53 None
               OpBranchConditional %52 %54 %55
         %55 = OpLabel
         %62 = OpCompositeInsert %_struct_17 %uint_0 %41 0
               OpBranch %53
         %54 = OpLabel
         %58 = OpIAdd %int %50 %int_1
         %60 = OpCompositeInsert %_struct_18 %58 %44 0
         %61 = OpCompositeConstruct %_struct_17 %uint_1 %50
               OpBranch %53
         %53 = OpLabel
         %42 = OpPhi %_struct_17 %61 %54 %62 %55
         %45 = OpPhi %_struct_18 %60 %54 %44 %55
         %63 = OpCompositeExtract %uint %42 0
         %64 = OpBitcast %int %63
               OpSelectionMerge %68 None
               OpSwitch %64 %66 0 %67 1 %68
         %68 = OpLabel
               OpBranch %43
         %67 = OpLabel
               OpStore %2 %91
               OpBranch %48
         %66 = OpLabel
               OpBranch %48
         %43 = OpLabel
               OpBranch %40
         %48 = OpLabel
         %86 = OpPhi %bool %false %66 %true %67
               OpSelectionMerge %84 None
               OpBranchConditional %86 %79 %84
         %84 = OpLabel
               OpBranch %79
         %79 = OpLabel
               OpReturn
               OpFunctionEnd
          %3 = OpFunction %void None %27
         %70 = OpLabel
         %71 = OpLoad %int %gl_VertexIndex
         %72 = OpISub %int %71 %int_1
         %73 = OpConvertSToF %float %72
         %74 = OpBitwiseAnd %int %71 %int_1
         %75 = OpIMul %int %74 %int_2
         %76 = OpISub %int %75 %int_1
         %77 = OpConvertSToF %float %76
         %78 = OpCompositeConstruct %v4float %73 %77 %float_0 %float_1
               OpStore %gl_Position %78
               OpReturn
               OpFunctionEnd

Loops infinitely in the naga output, but not when using the passthrough/'raw spirv' feature. Notice that this is the core of a

According to spirv-cross, the equivalent GLSL is:

#version 450

struct _6
{
    uint _m0;
    int _m1;
};

struct _7
{
    int _m0;
    int _m1;
};

layout(location = 0) out vec4 _2;

_6 _30;

void main()
{
    do
    {
        _6 _41;
        _7 _44;
        _41 = _30;
        _44 = _7(0, 1);
        _6 _42;
        _7 _45;
        bool _86;
        for (;;)
        {
            if (_44._m0 < _44._m1)
            {
                _7 _60 = _44;
                _60._m0 = _44._m0 + 1;
                _42 = _6(1u, _44._m0);
                _45 = _60;
            }
            else
            {
                _6 _62 = _41;
                _62._m0 = 0u;
                _42 = _62;
                _45 = _44;
            }
            bool _53_ladder_break = false;
            switch (int(_42._m0))
            {
                case 0:
                {
                    _2 = vec4(1.0, 0.0, 0.0, 1.0);
                    _86 = true;
                    _53_ladder_break = true;
                    break;
                }
                default:
                {
                    _86 = false;
                    _53_ladder_break = true;
                    break;
                }
            }
            if (_53_ladder_break)
            {
                break;
            }
            _41 = _42;
            _44 = _45;
            continue;
        }
        if (_86)
        {
            break;
        }
        break;
    } while(false);
}

Notice the _53_ladder_break. Naga however generates:

#version 310 es

precision highp float;
precision highp int;

struct type_5 {
    uint member;
    int member_1;
};

struct type_6 {
    int member;
    int member_1;
};

vec4 global = vec4(0.0, 0.0, 0.0, 0.0);

layout(location = 0) out vec4 _fs2p_location0;

void function() {
    type_5 phi_41_;
    type_6 phi_44_;
    type_5 phi_42_;
    type_6 phi_45_;
    bool phi_86_ = false;
    switch(int(0u)) {
        default:
            phi_41_ = type_5(0u, 0);
            phi_44_ = type_6(0, 1);
            bool loop_init = true;
            while(true) {
                if (!loop_init) {
                phi_41_ = phi_42_;
                phi_44_ = phi_45_;
                }
                loop_init = false;
                type_5 _e21 = phi_41_;
                type_6 _e23 = phi_44_;
                if ((_e23.member < _e23.member_1)) {
                    phi_42_ = type_5(1u, _e23.member);
                    phi_45_ = type_6((_e23.member + 1), _e23.member_1);
                } else {
                    phi_42_ = type_5(0u, _e21.member_1);
                    phi_45_ = _e23;
                }
                type_5 _e36 = phi_42_;
                type_6 _e38 = phi_45_;
                switch(int(_e36.member)) {
                    case 0:
                        global = vec4(1.0, 0.0, 0.0, 1.0);
                        phi_86_ = true;
                        break;
                    case 1:
                        break;
                    default:
                        phi_86_ = false;
                        break;
                }
                continue;
            }
            bool _e42 = phi_86_;
            if (_e42) {
                break;
            }
            break;
    }
    return;
}

void main() {
    function();
    vec4 _e1 = global;
    _fs2p_location0 = _e1;
    return;
}

where the innermost loop does not have an exit condition.

The merge block of the innermost switch is the branch when the discriminant is zero (the Some case),

In every other case, the function exits, eventually.

That is, effectively we have

'loop:
loop {
let is_first_time = ...;
 switch is_first_time {
     case 0:
     continue 'loop;
     case 1:
     write_out();
     break 'switch;
     default:
     break 'switch;
  }
  return;
}

But naga is confusing the continue with another break.

It's also possible that rust-gpu's codegen is wrong here. In:

%64 = OpBitcast %int %63
               OpSelectionMerge %68 None
               OpSwitch %64 %66 0 %67 1 %68
         %68 = OpLabel
               OpBranch %43
         %67 = OpLabel
               OpStore %2 %91
               OpBranch %48
         %66 = OpLabel
               OpBranch %48
         %43 = OpLabel
               OpBranch %40
         %48 = OpLabel

the multiple jumps to %48 seem suspect to me, since that is not the merge block.

DJMcNab avatar Dec 05 '21 19:12 DJMcNab

the multiple jumps to %48 seem suspect to me, since that is not the merge block.

I'm guessing they're loop breaks, because of OpLoopMerge %48 %43 None? Given "a break block is valid only for the innermost loop it is nested inside of" in the spec, but no limitation around the loop break being nested in a switch (though the other way is limited), I think this is legal?

eddyb avatar Dec 05 '21 20:12 eddyb

Thank you for filing! Summoning our SPIR-V expects @JCapucho and @jimblandy

kvark avatar Dec 06 '21 03:12 kvark

Just FYI, these confusing

OpSelectionMerge %79 None
OpSwitch %uint_0 %8 0

are added by spirv-opt's --merge-return pass (used with -O):

https://github.com/KhronosGroup/SPIRV-Tools/blob/1082de6bb32fc425623a6a218c5295bfd4db9a09/source/opt/merge_return_pass.h#L45-L57

I'm encountering this problem with rust-gpu shader that peforms an OpKill.

expenses avatar May 02 '22 21:05 expenses

As a temporary workaround, I've written an optimisation pass that removed the redundant branch: https://github.com/expenses/spirv-extra-opt-passes/blob/194f01af2957eab95195021f8e86e1cf5a7f0884/src/lib.rs#L841-L902. It's a bit hacked together and probably won't work outside of very simple cases because I don't correct the new merge block in subsequent OpSelectionMerges. I'm not 100% sure this is possible though, the --merge-return pass might exist for a reason :P

expenses avatar May 02 '22 21:05 expenses

That SPIR-V validates fine, so we can assume the breaks follow the structured control flow rules, and our front end ought to be handling it. (Naga does not fully validate SPIR-V; that's too big a job.)

The OpSwitch has three cases: %66, %67, and %68, which is both the merge block and the 1 case. %66 and %67 end with branches to %48, which is the containing loop's merge block, whereas %68 branches to the loop's continue block, so it goes around the loop again.

The SPIR-V front end records all merge blocks, so it recognizes that the OpBranch instructions at the ends of %66 and %67 are branching to the loop merge block, and calls merger to add the appropriate ending to body.

But I think the problem is here, where we try to add the right BodyFragment to the end of the Body that represents the switch case:

MergeBlockInformation::LoopMerge | MergeBlockInformation::SwitchMerge => {
    BodyFragment::Break
}

(See the docs on Body and BodyFragment for background.)

A BodyFragment::Break doesn't carry enough information to distinguish between a break from the enclosing switch and a break out of the nearest enclosing loop. SPIR-V allows both (as the structured control flow rules explain), whereas Naga IR only allows a break out of the innermost enclosing loop or switch.

Changing Naga IR to provide "break from innermost loop" would require supporting that IR in all the back ends. I think it would be better for the SPIR-V front end to introduce a bool variable and do a conditional break after the switch.

cc @JCapucho

jimblandy avatar May 03 '22 17:05 jimblandy

A BodyFragment::Break doesn't carry enough information to distinguish between a break from the enclosing switch and a break out of the nearest enclosing loop. SPIR-V allows both (as the structured control flow rules explain), whereas Naga IR only allows a break out of the innermost enclosing loop or switch.

Changing Naga IR to provide "break from innermost loop" would require supporting that IR in all the back ends. I think it would be better for the SPIR-V front end to introduce a bool variable and do a conditional break after the switch.

I agree with you in everything, this will probably need us to maintain depth information for continues and breaks (something that we discussed initially but didn't implement because it didn't seem necessary, didn't age well), the hardest part will be adding the Ifs to the block since we need to possibly add it in multiple places.

I'm not planning to work on this issue in the near future but if anyone wants guidance I'm available.

JCapucho avatar May 05 '22 18:05 JCapucho

Note that since we last looked at this, the section on "Structured control flow" in the SPIR-V section has been completely rewritten. Double-check me on this, but I think the new language says that, although it is okay to break out of a loop from within enclosed selection and switch constructs, you can't break out of more than one level of loop.

This is equivalent to having two kinds of break: loop breaks, and switch breaks. You don't need labels or depth counts or anything like that, to capture this.

I'm not sure that really simplifies anything. The back ends other than SPIR-V still can't express this, so I think it still makes sense to keep it out of Naga IR, and make the front end responsible for injecting the bools and branches.

jimblandy avatar Mar 22 '23 05:03 jimblandy

This is equivalent to having two kinds of break: loop breaks, and switch breaks.

I agree, but also I would go further and say that conflating the two is a syntactic confusion comparable to conflating break and return etc. - the only common thing between loop break and switch break is that they're both a forward branch, but you can say the same thing about continue (forward branch to the continuing {...} section).

I think it still makes sense to keep it out of Naga IR

I agree, though for a somewhat different reason

(actually "structured" control-flow disallows early exits of any kind and is much closer to e.g. SESE control-flow graphs, or functional if_then_else: (bool, T, T -> U, T -> U) -> U/do_while: (T, T -> (bool, T)) -> T combinators - SPIR-V has made a mess of the concept by taking unstructured GLSL control-flow and slapping a different sticker on it a la "goto considered dangerous" - I believe NIR is much stricter and has no returns or switch breaks, and it only kept loop breaks because those tend to be implemented by masking off the invocations that are breaking, with the loop actually finishing when all invocations have hit a break separately - it's almost like a loop-scoped OpKill, than actual control-flow, with the loop condition actually being "is everyone dead" - but I digress...)


I may want to take this on, if I can confirm spirv-opt easily generates this even in the absence of Rust-GPU.

eddyb avatar Apr 27 '23 10:04 eddyb

Managed to make a pure Naga+spirv-opt repro:

  • loop-break-from-switch.wgsl:
    @fragment
    fn main(@location(0) dyn_case: i32) -> @location(0) i32 {
        loop {
            switch(dyn_case) {
                case 0: {
                    return 0;
                }
                default: {
                    break;
                }
            }
            return -9;
        }
        return -9999;
    }
    
  • naga loop-break-from-switch.{wgsl,spv}
    spirv-opt -O loop-break-from-switch.spv -o loop-break-from-switch.opt.spv
    naga loop-break-from-switch.opt.{spv,wgsl}
    
  • loop-break-from-switch.opt.wgsl:
    var<private> global: i32;
    var<private> global_1: i32;
    
    fn function() {
        let _e8 = global;
        loop {
            switch _e8 {
                case 0: {
                    global_1 = 0;
                    break;
                }
                default: {
                    break;
                }
            }
            global_1 = -9;
            break;
        }
        return;
    }
    
    @fragment 
    fn main(@location(0) param: i32) -> @location(0) i32 {
        global = param;
        function();
        let _e3 = global_1;
        return _e3;
    }
    

It's pretty clear that the WGSL post-spirv-opt always returns -9, despite that not being the case originally (and it could be worse, e.g. the loop may become infinite, if the return -9; wasn't there, etc.).

eddyb avatar Apr 27 '23 17:04 eddyb

I think I have hit another reproduction of this. The following slang

[vk::binding(0, 0)]
RWStructuredBuffer<uint32_t> data;

[shader("compute")]
void main()
{
    for (uint32_t i = 0; i < 2; i++)
    {
        if (data[i] == 1)
        {
            continue;
        }
        if (data[i] == 2)
        {
            break;
        }
        data[i] = i * 2; // Example operation: double each element
    }
}

Generates the following spirv:

SPIR-V
; SPIR-V
; Version: 1.5
; Generator: Khronos Slang Compiler; 0
; Bound: 39
; Schema: 0
               OpCapability Shader
               OpExtension "SPV_KHR_storage_buffer_storage_class"
               OpMemoryModel Logical GLSL450
               OpEntryPoint GLCompute %main "main" %data
               OpExecutionMode %main LocalSize 1 1 1
               OpSource Slang 1
               OpName %RWStructuredBuffer "RWStructuredBuffer"
               OpName %data "data"
               OpName %main "main"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpDecorate %RWStructuredBuffer Block
               OpMemberDecorate %RWStructuredBuffer 0 Offset 0
               OpDecorate %data Binding 0
               OpDecorate %data DescriptorSet 0
       %void = OpTypeVoid
          %6 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
     %uint_0 = OpConstant %uint 0
       %bool = OpTypeBool
     %uint_4 = OpConstant %uint 4
        %int = OpTypeInt 32 1
      %int_0 = OpConstant %int 0
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
%_runtimearr_uint = OpTypeRuntimeArray %uint
%RWStructuredBuffer = OpTypeStruct %_runtimearr_uint
%_ptr_StorageBuffer_RWStructuredBuffer = OpTypePointer StorageBuffer %RWStructuredBuffer
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
       %data = OpVariable %_ptr_StorageBuffer_RWStructuredBuffer StorageBuffer
       %main = OpFunction %void None %6
         %17 = OpLabel
               OpBranch %18
         %18 = OpLabel
         %19 = OpPhi %uint %uint_0 %17 %20 %21
               OpLoopMerge %22 %21 None
               OpBranch %23
         %23 = OpLabel
               OpSelectionMerge %24 None
               OpSwitch %int_0 %25
         %25 = OpLabel
         %26 = OpULessThan %bool %19 %uint_4
               OpSelectionMerge %27 None
               OpBranchConditional %26 %27 %28
         %28 = OpLabel
               OpBranch %22
         %27 = OpLabel
         %29 = OpAccessChain %_ptr_StorageBuffer_uint %data %int_0 %19
         %30 = OpLoad %uint %29
         %31 = OpIEqual %bool %30 %uint_1
               OpSelectionMerge %32 None
               OpBranchConditional %31 %33 %32
         %33 = OpLabel
               OpBranch %24
         %32 = OpLabel
         %34 = OpLoad %uint %29
         %35 = OpIEqual %bool %34 %uint_2
               OpSelectionMerge %36 None
               OpBranchConditional %35 %37 %36
         %37 = OpLabel
               OpBranch %22
         %36 = OpLabel
         %38 = OpIMul %uint %19 %uint_2
               OpStore %29 %38
               OpBranch %24
         %24 = OpLabel
         %20 = OpIAdd %uint %19 %uint_1
               OpBranch %21
         %21 = OpLabel
               OpBranch %18
         %22 = OpLabel
               OpReturn
               OpFunctionEnd

Which is translated to the following wgsl (or hlsl/msl/etc)

struct RWStructuredBuffer {
    member: array<u32>,
}

@group(0) @binding(0) 
var<storage, read_write> data: RWStructuredBuffer;

fn main_1() {
    var phi_19_: u32;

    phi_19_ = 0u;
    loop {
        let _e7 = phi_19_;
        switch 0i {
            default: {
                if (_e7 < 4u) {
                } else {
                    break;
                }
                let _e11 = data.member[_e7];
                if (_e11 == 1u) {
                    break;
                }
                let _e13 = data.member[_e7];
                if (_e13 == 2u) {
                    break;
                }
                data.member[_e7] = (_e7 * 2u);
                break;
            }
        }
        continue;
        continuing {
            phi_19_ = (_e7 + 1u);
        }
    }
    return;
}

@compute @workgroup_size(1, 1, 1) 
fn main() {
    main_1();
}

Which isn't correct as it does not distinguish between break and continue

cwfitzgerald avatar Jun 12 '25 20:06 cwfitzgerald