goto between def and use may cause CIR generate multiple definition's code: operand #0 does not dominate this use
Reproducer:
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o - | FileCheck %s
struct def;
typedef struct def *decl;
struct def {
int index;
};
struct def d;
int foo(unsigned char cond)
{
if (cond)
goto label;
{
decl b = &d;
label:
return b->index;
}
return 0;
}
The generated CIR looks like:
cir.func @foo(%arg0: !u8i loc(fused[#loc5, #loc6])) -> !s32i extra(#fn_attr) {
%0 = cir.alloca !u8i, !cir.ptr<!u8i>, ["cond", init] {alignment = 1 : i64} loc(#loc25)
%1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64} loc(#loc4)
cir.store %arg0, %0 : !u8i, !cir.ptr<!u8i> loc(#loc7)
cir.scope {
%4 = cir.load %0 : !cir.ptr<!u8i>, !u8i loc(#loc10)
%5 = cir.cast(int_to_bool, %4 : !u8i), !cir.bool loc(#loc10)
cir.if %5 {
cir.goto "label" loc(#loc27)
} loc(#loc27)
} loc(#loc26)
cir.scope {
%4 = cir.alloca !cir.ptr<!ty_def>, !cir.ptr<!cir.ptr<!ty_def>>, ["b", init] {alignment = 8 : i64} loc(#loc29)
%5 = cir.get_global @d : !cir.ptr<!ty_def> loc(#loc23)
cir.store %5, %4 : !cir.ptr<!ty_def>, !cir.ptr<!cir.ptr<!ty_def>> loc(#loc29)
cir.br ^bb1 loc(#loc16)
^bb1: // pred: ^bb0
cir.label "label" loc(#loc16)
%6 = cir.load %4 : !cir.ptr<!cir.ptr<!ty_def>>, !cir.ptr<!ty_def> loc(#loc17)
%7 = cir.get_member %6[0] {name = "index"} : !cir.ptr<!ty_def> -> !cir.ptr<!s32i> loc(#loc18)
%8 = cir.load %7 : !cir.ptr<!s32i>, !s32i loc(#loc19)
cir.store %8, %1 : !s32i, !cir.ptr<!s32i> loc(#loc30)
%9 = cir.load %1 : !cir.ptr<!s32i>, !s32i loc(#loc30)
cir.return %9 : !s32i loc(#loc30)
} loc(#loc28)
%2 = cir.const #cir.int<0> : !s32i loc(#loc21)
cir.store %2, %1 : !s32i, !cir.ptr<!s32i> loc(#loc31)
%3 = cir.load %1 : !cir.ptr<!s32i>, !s32i loc(#loc31)
cir.return %3 : !s32i loc(#loc31)
} loc(#loc24)
We can find the def %4 and %5 are defined twice.
FYI, the generated LLVM IR without CIR pipeline is:
define dso_local i32 @foo(i8 noundef zeroext %cond) #0 {
entry:
%cond.addr = alloca i8, align 1
%b = alloca ptr, align 8
store i8 %cond, ptr %cond.addr, align 1
%0 = load i8, ptr %cond.addr, align 1
%tobool = icmp ne i8 %0, 0
br i1 %tobool, label %if.then, label %if.end
if.then: ; preds = %entry
br label %label
if.end: ; preds = %entry
store ptr @d, ptr %b, align 8
br label %label
label: ; preds = %if.end, %if.then
%1 = load ptr, ptr %b, align 8
%index = getelementptr inbounds %struct.def, ptr %1, i32 0, i32 0
%2 = load i32, ptr %index, align 4
ret i32 %2
}
It is a UB if cond is true. But we should never crash.
The GotoSolver pass is quite simple now and does not consider cases like this. I can think of two possible ways to fix this:
- Update the
GotoSolverpass and hoist those non-dominating definitions to the common dominator of thegotostatement and the label. - Invent a new pass (or maybe update the existing
FlattenCFGpass?) that runs beforeGotoSolverand hoist allallocas in a function to the entry block of the function.
I personally prefer the second approach since it's simpler and it generates similar code to the upstream clang.
I didn't know how we handle gotos. (2) sounds better solution to me too. It is better to avoid the invalid patterns at the first place.
Or maybe it is better to emit these allocas to the correct place at the very first if possible? It will be better to adjust the "incorrect" IR.
Or maybe it is better to emit these allocas to the correct place at the very first if possible? It will be better to adjust the "incorrect" IR.
The allocas are actually deliberately emitted in their innermost containing scopes now, so that lifetime analysis passes (or any other passes that concern lifetime) could more easily reason about local variable's lifetime.
All these analysis passes run before FlattenCIR, thus we'd better keep allocas in their places now until FlattenCIR.
After revisit the problem, I am wondering if this is a naming problem. I mean, if we can fix the problem by assigning different names to %4 and %5 in the scopes?
After revisit the problem, I am wondering if this is a naming problem. I mean, if we can fix the problem by assigning different names to %4 and %5 in the scopes?
I'm not very sure about what you mean by "names", but if you're referring to changing %4 to %abc or something more meaningful, nope, the name of SSA values should be irrelevant since no MLIR assembly parsing is involved. Although in the CIR you posted there are two SSA values with name %4, since they occur in two unrelated regions, they represent distinct SSA values.
You could make clang dump CIR after the GotoSolver pass by passing -mmlir --mlir-print-ir-after=cir-goto-solver and here is the CIR we get at there:
"cir.func"() <{calling_conv = 1 : i32, dsolocal, extra_attrs = #fn_attr, function_type = !cir.func<!s32i (!u8i)>, global_visibility = #cir<visibility default>, linkage = 0 : i32, sym_name = "foo"}> ({
^bb0(%arg0: !u8i):
%0 = "cir.alloca"() <{alignment = 1 : i64, allocaType = !u8i, init, name = "cond"}> : () -> !cir.ptr<!u8i>
%1 = "cir.alloca"() <{alignment = 4 : i64, allocaType = !s32i, name = "__retval"}> : () -> !cir.ptr<!s32i>
"cir.store"(%arg0, %0) : (!u8i, !cir.ptr<!u8i>) -> ()
"cir.br"()[^bb1] : () -> ()
^bb1: // pred: ^bb0
%2 = "cir.load"(%0) : (!cir.ptr<!u8i>) -> !u8i
%3 = "cir.cast"(%2) <{kind = 1 : i32}> : (!u8i) -> !cir.bool
"cir.brcond"(%3)[^bb2, ^bb3] <{operandSegmentSizes = array<i32: 1, 0, 0>}> : (!cir.bool) -> ()
^bb2: // pred: ^bb1
"cir.br"()[^bb6] : () -> ()
^bb3: // pred: ^bb1
"cir.br"()[^bb4] : () -> ()
^bb4: // pred: ^bb3
"cir.br"()[^bb5] : () -> ()
^bb5: // pred: ^bb4
%4 = "cir.alloca"() <{alignment = 8 : i64, allocaType = !cir.ptr<!ty_def>, init, name = "b"}> : () -> !cir.ptr<!cir.ptr<!ty_def>>
%5 = "cir.get_global"() <{name = @d}> : () -> !cir.ptr<!ty_def>
"cir.store"(%5, %4) : (!cir.ptr<!ty_def>, !cir.ptr<!cir.ptr<!ty_def>>) -> ()
"cir.br"()[^bb6] : () -> ()
^bb6: // 2 preds: ^bb2, ^bb5
%6 = "cir.load"(%4) : (!cir.ptr<!cir.ptr<!ty_def>>) -> !cir.ptr<!ty_def> // <=== Problematic operation
%7 = "cir.get_member"(%6) <{index_attr = 0 : index, name = "index"}> : (!cir.ptr<!ty_def>) -> !cir.ptr<!s32i>
%8 = "cir.load"(%7) : (!cir.ptr<!s32i>) -> !s32i
"cir.store"(%8, %1) : (!s32i, !cir.ptr<!s32i>) -> ()
%9 = "cir.load"(%1) : (!cir.ptr<!s32i>) -> !s32i
"cir.return"(%9) : (!s32i) -> ()
^bb7: // no predecessors
%10 = "cir.const"() <{value = #cir.int<0> : !s32i}> : () -> !s32i
"cir.store"(%10, %1) : (!s32i, !cir.ptr<!s32i>) -> ()
%11 = "cir.load"(%1) : (!cir.ptr<!s32i>) -> !s32i
"cir.return"(%11) : (!s32i) -> ()
}) : () -> ()
The problem occurs at the first operation in basic block ^bb6, which uses SSA value %4. But clearly the defining basic block of %4, a.k.a. basic block ^bb5, does not dominate ^bb6, thus the verification error.
Thanks. The name means different definitions. I thought if they mixed the definition somehow. But it looks not the case.
Thanks for filing this issue @ChuanqiXu9, we are also seeing it internally. @Lancern nice explanations!
Invent a new pass (or maybe update the existing FlattenCFG pass?) that runs before GotoSolver and hoist all allocas in a function to the entry block of the function.
+1 here, this has been the initial idea but hasn't been implemented yet. Basically, in FlattenCFG, as we unwrap structured control flow (scopes, etc) we should already move the alloca's up such that anything that gets to the GotoSolver or LLVM, should have the allocas in the entry basic block already.
Ideally, this is where we should also emit lifetime intrinsics (and introduce CIR operations for them) so that they can track the original lifetimes before we lose them by moving the allocas up - so we should put a TODO/FIXME along the code that moves allocas in order to later emit those intrinsics.
I sent https://github.com/llvm/clangir/pull/887