wgpu
wgpu copied to clipboard
[spv-in] Naga miscompiles SPIR-V with branches that exit more than one level of structured control flow
Using rust-gpu, this shader:
#[spirv(fragment)]
pub fn main_fs(output: &mut Vec4) {
let mut iter = 0..1;
loop {
match iter.next() {
Some(_) => (),
None => break,
};
}
*output = vec4(1.0, 0.0, 0.0, 1.0);
}
Notice that this is the core of what a for loop generated
The downloadable SPIRV and disassembly
A built version of the shader is here: simplest_shader.zip)
And the dissasembly is:
; SPIR-V
; Version: 1.3
; Generator: Unknown(27); 0
; Bound: 92
; Schema: 0
OpCapability Shader
OpCapability VulkanMemoryModel
OpExtension "SPV_KHR_vulkan_memory_model"
OpMemoryModel Logical Vulkan
OpEntryPoint Fragment %1 "main_fs" %2
OpEntryPoint Vertex %3 "main_vs" %gl_VertexIndex %gl_Position
OpExecutionMode %1 OriginUpperLeft
OpMemberDecorate %_struct_17 0 Offset 0
OpMemberDecorate %_struct_17 1 Offset 4
OpMemberDecorate %_struct_18 0 Offset 0
OpMemberDecorate %_struct_18 1 Offset 4
OpDecorate %2 Location 0
OpDecorate %gl_VertexIndex BuiltIn VertexIndex
OpDecorate %gl_Position BuiltIn Position
OpDecorate %gl_Position Invariant
%int = OpTypeInt 32 1
%_ptr_Input_int = OpTypePointer Input %int
%void = OpTypeVoid
%uint = OpTypeInt 32 0
%float = OpTypeFloat 32
%v4float = OpTypeVector %float 4
%_struct_17 = OpTypeStruct %uint %int
%_struct_18 = OpTypeStruct %int %int
%bool = OpTypeBool
%_ptr_Output_v4float = OpTypePointer Output %v4float
%27 = OpTypeFunction %void
%2 = OpVariable %_ptr_Output_v4float Output
%gl_VertexIndex = OpVariable %_ptr_Input_int Input
%gl_Position = OpVariable %_ptr_Output_v4float Output
%uint_1 = OpConstant %uint 1
%uint_0 = OpConstant %uint 0
%30 = OpUndef %_struct_17
%int_0 = OpConstant %int 0
%int_1 = OpConstant %int 1
%float_1 = OpConstant %float 1
%float_0 = OpConstant %float 0
%int_2 = OpConstant %int 2
%true = OpConstantTrue %bool
%false = OpConstantFalse %bool
%89 = OpConstantComposite %_struct_18 %int_0 %int_1
%91 = OpConstantComposite %v4float %float_1 %float_0 %float_0 %float_1
%1 = OpFunction %void None %27
%37 = OpLabel
OpSelectionMerge %79 None
OpSwitch %uint_0 %80
%80 = OpLabel
OpBranch %40
%40 = OpLabel
%41 = OpPhi %_struct_17 %30 %80 %42 %43
%44 = OpPhi %_struct_18 %89 %80 %45 %43
OpLoopMerge %48 %43 None
OpBranch %49
%49 = OpLabel
%50 = OpCompositeExtract %int %44 0
%51 = OpCompositeExtract %int %44 1
%52 = OpSLessThan %bool %50 %51
OpSelectionMerge %53 None
OpBranchConditional %52 %54 %55
%55 = OpLabel
%62 = OpCompositeInsert %_struct_17 %uint_0 %41 0
OpBranch %53
%54 = OpLabel
%58 = OpIAdd %int %50 %int_1
%60 = OpCompositeInsert %_struct_18 %58 %44 0
%61 = OpCompositeConstruct %_struct_17 %uint_1 %50
OpBranch %53
%53 = OpLabel
%42 = OpPhi %_struct_17 %61 %54 %62 %55
%45 = OpPhi %_struct_18 %60 %54 %44 %55
%63 = OpCompositeExtract %uint %42 0
%64 = OpBitcast %int %63
OpSelectionMerge %68 None
OpSwitch %64 %66 0 %67 1 %68
%68 = OpLabel
OpBranch %43
%67 = OpLabel
OpStore %2 %91
OpBranch %48
%66 = OpLabel
OpBranch %48
%43 = OpLabel
OpBranch %40
%48 = OpLabel
%86 = OpPhi %bool %false %66 %true %67
OpSelectionMerge %84 None
OpBranchConditional %86 %79 %84
%84 = OpLabel
OpBranch %79
%79 = OpLabel
OpReturn
OpFunctionEnd
%3 = OpFunction %void None %27
%70 = OpLabel
%71 = OpLoad %int %gl_VertexIndex
%72 = OpISub %int %71 %int_1
%73 = OpConvertSToF %float %72
%74 = OpBitwiseAnd %int %71 %int_1
%75 = OpIMul %int %74 %int_2
%76 = OpISub %int %75 %int_1
%77 = OpConvertSToF %float %76
%78 = OpCompositeConstruct %v4float %73 %77 %float_0 %float_1
OpStore %gl_Position %78
OpReturn
OpFunctionEnd
Loops infinitely in the naga output, but not when using the passthrough/'raw spirv' feature. Notice that this is the core of a
According to spirv-cross, the equivalent GLSL is:
#version 450
struct _6
{
uint _m0;
int _m1;
};
struct _7
{
int _m0;
int _m1;
};
layout(location = 0) out vec4 _2;
_6 _30;
void main()
{
do
{
_6 _41;
_7 _44;
_41 = _30;
_44 = _7(0, 1);
_6 _42;
_7 _45;
bool _86;
for (;;)
{
if (_44._m0 < _44._m1)
{
_7 _60 = _44;
_60._m0 = _44._m0 + 1;
_42 = _6(1u, _44._m0);
_45 = _60;
}
else
{
_6 _62 = _41;
_62._m0 = 0u;
_42 = _62;
_45 = _44;
}
bool _53_ladder_break = false;
switch (int(_42._m0))
{
case 0:
{
_2 = vec4(1.0, 0.0, 0.0, 1.0);
_86 = true;
_53_ladder_break = true;
break;
}
default:
{
_86 = false;
_53_ladder_break = true;
break;
}
}
if (_53_ladder_break)
{
break;
}
_41 = _42;
_44 = _45;
continue;
}
if (_86)
{
break;
}
break;
} while(false);
}
Notice the _53_ladder_break. Naga however generates:
#version 310 es
precision highp float;
precision highp int;
struct type_5 {
uint member;
int member_1;
};
struct type_6 {
int member;
int member_1;
};
vec4 global = vec4(0.0, 0.0, 0.0, 0.0);
layout(location = 0) out vec4 _fs2p_location0;
void function() {
type_5 phi_41_;
type_6 phi_44_;
type_5 phi_42_;
type_6 phi_45_;
bool phi_86_ = false;
switch(int(0u)) {
default:
phi_41_ = type_5(0u, 0);
phi_44_ = type_6(0, 1);
bool loop_init = true;
while(true) {
if (!loop_init) {
phi_41_ = phi_42_;
phi_44_ = phi_45_;
}
loop_init = false;
type_5 _e21 = phi_41_;
type_6 _e23 = phi_44_;
if ((_e23.member < _e23.member_1)) {
phi_42_ = type_5(1u, _e23.member);
phi_45_ = type_6((_e23.member + 1), _e23.member_1);
} else {
phi_42_ = type_5(0u, _e21.member_1);
phi_45_ = _e23;
}
type_5 _e36 = phi_42_;
type_6 _e38 = phi_45_;
switch(int(_e36.member)) {
case 0:
global = vec4(1.0, 0.0, 0.0, 1.0);
phi_86_ = true;
break;
case 1:
break;
default:
phi_86_ = false;
break;
}
continue;
}
bool _e42 = phi_86_;
if (_e42) {
break;
}
break;
}
return;
}
void main() {
function();
vec4 _e1 = global;
_fs2p_location0 = _e1;
return;
}
where the innermost loop does not have an exit condition.
The merge block of the innermost switch is the branch when the discriminant is zero (the Some case),
In every other case, the function exits, eventually.
That is, effectively we have
'loop:
loop {
let is_first_time = ...;
switch is_first_time {
case 0:
continue 'loop;
case 1:
write_out();
break 'switch;
default:
break 'switch;
}
return;
}
But naga is confusing the continue with another break.
It's also possible that rust-gpu's codegen is wrong here. In:
%64 = OpBitcast %int %63
OpSelectionMerge %68 None
OpSwitch %64 %66 0 %67 1 %68
%68 = OpLabel
OpBranch %43
%67 = OpLabel
OpStore %2 %91
OpBranch %48
%66 = OpLabel
OpBranch %48
%43 = OpLabel
OpBranch %40
%48 = OpLabel
the multiple jumps to %48 seem suspect to me, since that is not the merge block.
the multiple jumps to
%48seem suspect to me, since that is not the merge block.
I'm guessing they're loop breaks, because of OpLoopMerge %48 %43 None?
Given "a break block is valid only for the innermost loop it is nested inside of" in the spec, but no limitation around the loop break being nested in a switch (though the other way is limited), I think this is legal?
Thank you for filing! Summoning our SPIR-V expects @JCapucho and @jimblandy
Just FYI, these confusing
OpSelectionMerge %79 None
OpSwitch %uint_0 %8 0
are added by spirv-opt's --merge-return pass (used with -O):
https://github.com/KhronosGroup/SPIRV-Tools/blob/1082de6bb32fc425623a6a218c5295bfd4db9a09/source/opt/merge_return_pass.h#L45-L57
I'm encountering this problem with rust-gpu shader that peforms an OpKill.
As a temporary workaround, I've written an optimisation pass that removed the redundant branch: https://github.com/expenses/spirv-extra-opt-passes/blob/194f01af2957eab95195021f8e86e1cf5a7f0884/src/lib.rs#L841-L902. It's a bit hacked together and probably won't work outside of very simple cases because I don't correct the new merge block in subsequent OpSelectionMerges. I'm not 100% sure this is possible though, the --merge-return pass might exist for a reason :P
That SPIR-V validates fine, so we can assume the breaks follow the structured control flow rules, and our front end ought to be handling it. (Naga does not fully validate SPIR-V; that's too big a job.)
The OpSwitch has three cases: %66, %67, and %68, which is both the merge block and the 1 case. %66 and %67 end with branches to %48, which is the containing loop's merge block, whereas %68 branches to the loop's continue block, so it goes around the loop again.
The SPIR-V front end records all merge blocks, so it recognizes that the OpBranch instructions at the ends of %66 and %67 are branching to the loop merge block, and calls merger to add the appropriate ending to body.
But I think the problem is here, where we try to add the right BodyFragment to the end of the Body that represents the switch case:
MergeBlockInformation::LoopMerge | MergeBlockInformation::SwitchMerge => {
BodyFragment::Break
}
(See the docs on Body and BodyFragment for background.)
A BodyFragment::Break doesn't carry enough information to distinguish between a break from the enclosing switch and a break out of the nearest enclosing loop. SPIR-V allows both (as the structured control flow rules explain), whereas Naga IR only allows a break out of the innermost enclosing loop or switch.
Changing Naga IR to provide "break from innermost loop" would require supporting that IR in all the back ends. I think it would be better for the SPIR-V front end to introduce a bool variable and do a conditional break after the switch.
cc @JCapucho
A
BodyFragment::Breakdoesn't carry enough information to distinguish between a break from the enclosing switch and a break out of the nearest enclosing loop. SPIR-V allows both (as the structured control flow rules explain), whereas Naga IR only allows a break out of the innermost enclosing loop or switch.Changing Naga IR to provide "break from innermost loop" would require supporting that IR in all the back ends. I think it would be better for the SPIR-V front end to introduce a bool variable and do a conditional break after the switch.
I agree with you in everything, this will probably need us to maintain depth information for continues and breaks (something that we discussed initially but didn't implement because it didn't seem necessary, didn't age well), the hardest part will be adding the Ifs to the block since we need to possibly add it in multiple places.
I'm not planning to work on this issue in the near future but if anyone wants guidance I'm available.
Note that since we last looked at this, the section on "Structured control flow" in the SPIR-V section has been completely rewritten. Double-check me on this, but I think the new language says that, although it is okay to break out of a loop from within enclosed selection and switch constructs, you can't break out of more than one level of loop.
This is equivalent to having two kinds of break: loop breaks, and switch breaks. You don't need labels or depth counts or anything like that, to capture this.
I'm not sure that really simplifies anything. The back ends other than SPIR-V still can't express this, so I think it still makes sense to keep it out of Naga IR, and make the front end responsible for injecting the bools and branches.
This is equivalent to having two kinds of break: loop breaks, and switch breaks.
I agree, but also I would go further and say that conflating the two is a syntactic confusion comparable to conflating break and return etc. - the only common thing between loop break and switch break is that they're both a forward branch, but you can say the same thing about continue (forward branch to the continuing {...} section).
I think it still makes sense to keep it out of Naga IR
I agree, though for a somewhat different reason
(actually "structured" control-flow disallows early exits of any kind and is much closer to e.g. SESE control-flow graphs, or functional if_then_else: (bool, T, T -> U, T -> U) -> U/do_while: (T, T -> (bool, T)) -> T combinators - SPIR-V has made a mess of the concept by taking unstructured GLSL control-flow and slapping a different sticker on it a la "goto considered dangerous" - I believe NIR is much stricter and has no returns or switch breaks, and it only kept loop breaks because those tend to be implemented by masking off the invocations that are breaking, with the loop actually finishing when all invocations have hit a break separately - it's almost like a loop-scoped OpKill, than actual control-flow, with the loop condition actually being "is everyone dead" - but I digress...)
I may want to take this on, if I can confirm spirv-opt easily generates this even in the absence of Rust-GPU.
Managed to make a pure Naga+spirv-opt repro:
loop-break-from-switch.wgsl:@fragment fn main(@location(0) dyn_case: i32) -> @location(0) i32 { loop { switch(dyn_case) { case 0: { return 0; } default: { break; } } return -9; } return -9999; }-
naga loop-break-from-switch.{wgsl,spv} spirv-opt -O loop-break-from-switch.spv -o loop-break-from-switch.opt.spv naga loop-break-from-switch.opt.{spv,wgsl} loop-break-from-switch.opt.wgsl:var<private> global: i32; var<private> global_1: i32; fn function() { let _e8 = global; loop { switch _e8 { case 0: { global_1 = 0; break; } default: { break; } } global_1 = -9; break; } return; } @fragment fn main(@location(0) param: i32) -> @location(0) i32 { global = param; function(); let _e3 = global_1; return _e3; }
It's pretty clear that the WGSL post-spirv-opt always returns -9, despite that not being the case originally (and it could be worse, e.g. the loop may become infinite, if the return -9; wasn't there, etc.).
I think I have hit another reproduction of this. The following slang
[vk::binding(0, 0)]
RWStructuredBuffer<uint32_t> data;
[shader("compute")]
void main()
{
for (uint32_t i = 0; i < 2; i++)
{
if (data[i] == 1)
{
continue;
}
if (data[i] == 2)
{
break;
}
data[i] = i * 2; // Example operation: double each element
}
}
Generates the following spirv:
SPIR-V
; SPIR-V
; Version: 1.5
; Generator: Khronos Slang Compiler; 0
; Bound: 39
; Schema: 0
OpCapability Shader
OpExtension "SPV_KHR_storage_buffer_storage_class"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %main "main" %data
OpExecutionMode %main LocalSize 1 1 1
OpSource Slang 1
OpName %RWStructuredBuffer "RWStructuredBuffer"
OpName %data "data"
OpName %main "main"
OpDecorate %_runtimearr_uint ArrayStride 4
OpDecorate %RWStructuredBuffer Block
OpMemberDecorate %RWStructuredBuffer 0 Offset 0
OpDecorate %data Binding 0
OpDecorate %data DescriptorSet 0
%void = OpTypeVoid
%6 = OpTypeFunction %void
%uint = OpTypeInt 32 0
%uint_0 = OpConstant %uint 0
%bool = OpTypeBool
%uint_4 = OpConstant %uint 4
%int = OpTypeInt 32 1
%int_0 = OpConstant %int 0
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
%_runtimearr_uint = OpTypeRuntimeArray %uint
%RWStructuredBuffer = OpTypeStruct %_runtimearr_uint
%_ptr_StorageBuffer_RWStructuredBuffer = OpTypePointer StorageBuffer %RWStructuredBuffer
%uint_1 = OpConstant %uint 1
%uint_2 = OpConstant %uint 2
%data = OpVariable %_ptr_StorageBuffer_RWStructuredBuffer StorageBuffer
%main = OpFunction %void None %6
%17 = OpLabel
OpBranch %18
%18 = OpLabel
%19 = OpPhi %uint %uint_0 %17 %20 %21
OpLoopMerge %22 %21 None
OpBranch %23
%23 = OpLabel
OpSelectionMerge %24 None
OpSwitch %int_0 %25
%25 = OpLabel
%26 = OpULessThan %bool %19 %uint_4
OpSelectionMerge %27 None
OpBranchConditional %26 %27 %28
%28 = OpLabel
OpBranch %22
%27 = OpLabel
%29 = OpAccessChain %_ptr_StorageBuffer_uint %data %int_0 %19
%30 = OpLoad %uint %29
%31 = OpIEqual %bool %30 %uint_1
OpSelectionMerge %32 None
OpBranchConditional %31 %33 %32
%33 = OpLabel
OpBranch %24
%32 = OpLabel
%34 = OpLoad %uint %29
%35 = OpIEqual %bool %34 %uint_2
OpSelectionMerge %36 None
OpBranchConditional %35 %37 %36
%37 = OpLabel
OpBranch %22
%36 = OpLabel
%38 = OpIMul %uint %19 %uint_2
OpStore %29 %38
OpBranch %24
%24 = OpLabel
%20 = OpIAdd %uint %19 %uint_1
OpBranch %21
%21 = OpLabel
OpBranch %18
%22 = OpLabel
OpReturn
OpFunctionEnd
Which is translated to the following wgsl (or hlsl/msl/etc)
struct RWStructuredBuffer {
member: array<u32>,
}
@group(0) @binding(0)
var<storage, read_write> data: RWStructuredBuffer;
fn main_1() {
var phi_19_: u32;
phi_19_ = 0u;
loop {
let _e7 = phi_19_;
switch 0i {
default: {
if (_e7 < 4u) {
} else {
break;
}
let _e11 = data.member[_e7];
if (_e11 == 1u) {
break;
}
let _e13 = data.member[_e7];
if (_e13 == 2u) {
break;
}
data.member[_e7] = (_e7 * 2u);
break;
}
}
continue;
continuing {
phi_19_ = (_e7 + 1u);
}
}
return;
}
@compute @workgroup_size(1, 1, 1)
fn main() {
main_1();
}
Which isn't correct as it does not distinguish between break and continue