llvm-project icon indicating copy to clipboard operation
llvm-project copied to clipboard

Add support for flag output operand "=@cc" for SystemZ.

Open anoopkg6 opened this issue 1 year ago • 5 comments

Add support for flag output operand "=@cc" for SystemZ and optimizing conditional branch for 14 possible combinations of CC mask.

anoopkg6 avatar Feb 06 '25 00:02 anoopkg6

@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-backend-systemz

@llvm/pr-subscribers-clang

Author: None (anoopkg6)

Changes

Add support for flag output operand "=@cc" for SystemZ and optimizing conditional branch for 14 possible combinations of CC mask.


Patch is 616.60 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/125970.diff

21 Files Affected:

  • (modified) clang/lib/Basic/Targets/SystemZ.cpp (+11)
  • (modified) clang/lib/Basic/Targets/SystemZ.h (+5)
  • (modified) clang/lib/CodeGen/CGStmt.cpp (+8-2)
  • (added) clang/test/CodeGen/inline-asm-systemz-flag-output.c (+149)
  • (modified) llvm/include/llvm/CodeGen/TargetLowering.h (+3)
  • (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+61-9)
  • (modified) llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp (+4)
  • (modified) llvm/lib/Target/SystemZ/SystemZISelLowering.cpp (+598-2)
  • (modified) llvm/lib/Target/SystemZ/SystemZISelLowering.h (+14)
  • (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccand.ll (+500)
  • (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccand_eq_noteq.ll (+939)
  • (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccand_not.ll (+779)
  • (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccmixed.ll (+2427)
  • (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccmixed_eq_noteq.ll (+5248)
  • (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccmixed_not.ll (+2543)
  • (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccor.ll (+1047)
  • (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccor_eq_noteq.ll (+854)
  • (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccor_not.ll (+806)
  • (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccxor.ll (+784)
  • (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccxor_eq_noteq.ll (+1083)
  • (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccxor_not.ll (+778)
diff --git a/clang/lib/Basic/Targets/SystemZ.cpp b/clang/lib/Basic/Targets/SystemZ.cpp
index 06f08db2eadd475..49f88b45220d0c4 100644
--- a/clang/lib/Basic/Targets/SystemZ.cpp
+++ b/clang/lib/Basic/Targets/SystemZ.cpp
@@ -90,6 +90,14 @@ bool SystemZTargetInfo::validateAsmConstraint(
   case 'T': // Likewise, plus an index
     Info.setAllowsMemory();
     return true;
+  case '@':
+    // CC condition changes.
+    if (strlen(Name) >= 3 && *(Name + 1) == 'c' && *(Name + 2) == 'c') {
+      Name += 2;
+      Info.setAllowsRegister();
+      return true;
+    }
+    return false;
   }
 }
 
@@ -150,6 +158,9 @@ unsigned SystemZTargetInfo::getMinGlobalAlign(uint64_t Size,
 
 void SystemZTargetInfo::getTargetDefines(const LangOptions &Opts,
                                          MacroBuilder &Builder) const {
+  // Inline assembly supports SystemZ flag outputs.
+  Builder.defineMacro("__GCC_ASM_FLAG_OUTPUTS__");
+
   Builder.defineMacro("__s390__");
   Builder.defineMacro("__s390x__");
   Builder.defineMacro("__zarch__");
diff --git a/clang/lib/Basic/Targets/SystemZ.h b/clang/lib/Basic/Targets/SystemZ.h
index ef9a07033a6e4ff..a6909ababdec001 100644
--- a/clang/lib/Basic/Targets/SystemZ.h
+++ b/clang/lib/Basic/Targets/SystemZ.h
@@ -118,6 +118,11 @@ class LLVM_LIBRARY_VISIBILITY SystemZTargetInfo : public TargetInfo {
                              TargetInfo::ConstraintInfo &info) const override;
 
   std::string convertConstraint(const char *&Constraint) const override {
+    if (strncmp(Constraint, "@cc", 3) == 0) {
+      std::string Converted = "{" + std::string(Constraint, 3) + "}";
+      Constraint += 3;
+      return Converted;
+    }
     switch (Constraint[0]) {
     case 'p': // Keep 'p' constraint.
       return std::string("p");
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 41dc91c578c800a..27f7bb652895839 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -2563,9 +2563,15 @@ EmitAsmStores(CodeGenFunction &CGF, const AsmStmt &S,
     if ((i < ResultRegIsFlagReg.size()) && ResultRegIsFlagReg[i]) {
       // Target must guarantee the Value `Tmp` here is lowered to a boolean
       // value.
-      llvm::Constant *Two = llvm::ConstantInt::get(Tmp->getType(), 2);
+      unsigned CCUpperBound = 2;
+      if (CGF.getTarget().getTriple().getArch() == llvm::Triple::systemz) {
+        // On this target CC value can be in range [0, 3].
+        CCUpperBound = 4;
+      }
+      llvm::Constant *CCUpperBoundConst =
+          llvm::ConstantInt::get(Tmp->getType(), CCUpperBound);
       llvm::Value *IsBooleanValue =
-          Builder.CreateCmp(llvm::CmpInst::ICMP_ULT, Tmp, Two);
+          Builder.CreateCmp(llvm::CmpInst::ICMP_ULT, Tmp, CCUpperBoundConst);
       llvm::Function *FnAssume = CGM.getIntrinsic(llvm::Intrinsic::assume);
       Builder.CreateCall(FnAssume, IsBooleanValue);
     }
diff --git a/clang/test/CodeGen/inline-asm-systemz-flag-output.c b/clang/test/CodeGen/inline-asm-systemz-flag-output.c
new file mode 100644
index 000000000000000..ab90e031df1f2b8
--- /dev/null
+++ b/clang/test/CodeGen/inline-asm-systemz-flag-output.c
@@ -0,0 +1,149 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple s390x-linux -emit-llvm -o - %s | FileCheck %s
+// CHECK-LABEL: define dso_local signext i32 @foo_012(
+// CHECK-SAME: i32 noundef signext [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*]]:
+// CHECK-NEXT:    [[X_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[CC:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[X]], ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = call { i32, i32 } asm sideeffect "ahi $0,42\0A", "=d,={@cc},0"(i32 [[TMP0]]) #[[ATTR2:[0-9]+]], !srcloc [[META2:![0-9]+]]
+// CHECK-NEXT:    [[ASMRESULT:%.*]] = extractvalue { i32, i32 } [[TMP1]], 0
+// CHECK-NEXT:    [[ASMRESULT1:%.*]] = extractvalue { i32, i32 } [[TMP1]], 1
+// CHECK-NEXT:    store i32 [[ASMRESULT]], ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[ASMRESULT1]], 4
+// CHECK-NEXT:    call void @llvm.assume(i1 [[TMP2]])
+// CHECK-NEXT:    store i32 [[ASMRESULT1]], ptr [[CC]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP3]], 0
+// CHECK-NEXT:    br i1 [[CMP]], label %[[LOR_END:.*]], label %[[LOR_LHS_FALSE:.*]]
+// CHECK:       [[LOR_LHS_FALSE]]:
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[TMP4]], 1
+// CHECK-NEXT:    br i1 [[CMP2]], label %[[LOR_END]], label %[[LOR_RHS:.*]]
+// CHECK:       [[LOR_RHS]]:
+// CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i32 [[TMP5]], 2
+// CHECK-NEXT:    br label %[[LOR_END]]
+// CHECK:       [[LOR_END]]:
+// CHECK-NEXT:    [[TMP6:%.*]] = phi i1 [ true, %[[LOR_LHS_FALSE]] ], [ true, %[[ENTRY]] ], [ [[CMP3]], %[[LOR_RHS]] ]
+// CHECK-NEXT:    [[TMP7:%.*]] = zext i1 [[TMP6]] to i64
+// CHECK-NEXT:    [[COND:%.*]] = select i1 [[TMP6]], i32 42, i32 0
+// CHECK-NEXT:    ret i32 [[COND]]
+//
+int foo_012(int x) {
+  int cc;
+  asm volatile ("ahi %[x],42\n" : [x] "+d"(x), "=@cc" (cc));
+  return cc == 0 || cc == 1 || cc == 2 ? 42 : 0;
+}
+
+// CHECK-LABEL: define dso_local signext i32 @foo_013(
+// CHECK-SAME: i32 noundef signext [[X:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*]]:
+// CHECK-NEXT:    [[X_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[CC:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[X]], ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = call { i32, i32 } asm sideeffect "ahi $0,42\0A", "=d,={@cc},0"(i32 [[TMP0]]) #[[ATTR2]], !srcloc [[META3:![0-9]+]]
+// CHECK-NEXT:    [[ASMRESULT:%.*]] = extractvalue { i32, i32 } [[TMP1]], 0
+// CHECK-NEXT:    [[ASMRESULT1:%.*]] = extractvalue { i32, i32 } [[TMP1]], 1
+// CHECK-NEXT:    store i32 [[ASMRESULT]], ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[ASMRESULT1]], 4
+// CHECK-NEXT:    call void @llvm.assume(i1 [[TMP2]])
+// CHECK-NEXT:    store i32 [[ASMRESULT1]], ptr [[CC]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP3]], 0
+// CHECK-NEXT:    br i1 [[CMP]], label %[[LOR_END:.*]], label %[[LOR_LHS_FALSE:.*]]
+// CHECK:       [[LOR_LHS_FALSE]]:
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[TMP4]], 1
+// CHECK-NEXT:    br i1 [[CMP2]], label %[[LOR_END]], label %[[LOR_RHS:.*]]
+// CHECK:       [[LOR_RHS]]:
+// CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i32 [[TMP5]], 3
+// CHECK-NEXT:    br label %[[LOR_END]]
+// CHECK:       [[LOR_END]]:
+// CHECK-NEXT:    [[TMP6:%.*]] = phi i1 [ true, %[[LOR_LHS_FALSE]] ], [ true, %[[ENTRY]] ], [ [[CMP3]], %[[LOR_RHS]] ]
+// CHECK-NEXT:    [[TMP7:%.*]] = zext i1 [[TMP6]] to i64
+// CHECK-NEXT:    [[COND:%.*]] = select i1 [[TMP6]], i32 42, i32 0
+// CHECK-NEXT:    ret i32 [[COND]]
+//
+int foo_013(int x) {
+  int cc;
+  asm volatile ("ahi %[x],42\n" : [x] "+d"(x), "=@cc" (cc));
+  return cc == 0 || cc == 1 || cc == 3 ? 42 : 0;
+}
+
+// CHECK-LABEL: define dso_local signext i32 @foo_023(
+// CHECK-SAME: i32 noundef signext [[X:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*]]:
+// CHECK-NEXT:    [[X_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[CC:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[X]], ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = call { i32, i32 } asm sideeffect "ahi $0,42\0A", "=d,={@cc},0"(i32 [[TMP0]]) #[[ATTR2]], !srcloc [[META4:![0-9]+]]
+// CHECK-NEXT:    [[ASMRESULT:%.*]] = extractvalue { i32, i32 } [[TMP1]], 0
+// CHECK-NEXT:    [[ASMRESULT1:%.*]] = extractvalue { i32, i32 } [[TMP1]], 1
+// CHECK-NEXT:    store i32 [[ASMRESULT]], ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[ASMRESULT1]], 4
+// CHECK-NEXT:    call void @llvm.assume(i1 [[TMP2]])
+// CHECK-NEXT:    store i32 [[ASMRESULT1]], ptr [[CC]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP3]], 0
+// CHECK-NEXT:    br i1 [[CMP]], label %[[LOR_END:.*]], label %[[LOR_LHS_FALSE:.*]]
+// CHECK:       [[LOR_LHS_FALSE]]:
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[TMP4]], 2
+// CHECK-NEXT:    br i1 [[CMP2]], label %[[LOR_END]], label %[[LOR_RHS:.*]]
+// CHECK:       [[LOR_RHS]]:
+// CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i32 [[TMP5]], 3
+// CHECK-NEXT:    br label %[[LOR_END]]
+// CHECK:       [[LOR_END]]:
+// CHECK-NEXT:    [[TMP6:%.*]] = phi i1 [ true, %[[LOR_LHS_FALSE]] ], [ true, %[[ENTRY]] ], [ [[CMP3]], %[[LOR_RHS]] ]
+// CHECK-NEXT:    [[TMP7:%.*]] = zext i1 [[TMP6]] to i64
+// CHECK-NEXT:    [[COND:%.*]] = select i1 [[TMP6]], i32 42, i32 0
+// CHECK-NEXT:    ret i32 [[COND]]
+//
+int foo_023(int x) {
+  int cc;
+  asm volatile ("ahi %[x],42\n" : [x] "+d"(x), "=@cc" (cc));
+  return cc == 0 || cc == 2 || cc == 3 ? 42 : 0;
+}
+
+// CHECK-LABEL: define dso_local signext i32 @foo_123(
+// CHECK-SAME: i32 noundef signext [[X:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*]]:
+// CHECK-NEXT:    [[X_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[CC:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[X]], ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = call { i32, i32 } asm sideeffect "ahi $0,42\0A", "=d,={@cc},0"(i32 [[TMP0]]) #[[ATTR2]], !srcloc [[META5:![0-9]+]]
+// CHECK-NEXT:    [[ASMRESULT:%.*]] = extractvalue { i32, i32 } [[TMP1]], 0
+// CHECK-NEXT:    [[ASMRESULT1:%.*]] = extractvalue { i32, i32 } [[TMP1]], 1
+// CHECK-NEXT:    store i32 [[ASMRESULT]], ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[ASMRESULT1]], 4
+// CHECK-NEXT:    call void @llvm.assume(i1 [[TMP2]])
+// CHECK-NEXT:    store i32 [[ASMRESULT1]], ptr [[CC]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP3]], 1
+// CHECK-NEXT:    br i1 [[CMP]], label %[[LOR_END:.*]], label %[[LOR_LHS_FALSE:.*]]
+// CHECK:       [[LOR_LHS_FALSE]]:
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[TMP4]], 2
+// CHECK-NEXT:    br i1 [[CMP2]], label %[[LOR_END]], label %[[LOR_RHS:.*]]
+// CHECK:       [[LOR_RHS]]:
+// CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i32 [[TMP5]], 3
+// CHECK-NEXT:    br label %[[LOR_END]]
+// CHECK:       [[LOR_END]]:
+// CHECK-NEXT:    [[TMP6:%.*]] = phi i1 [ true, %[[LOR_LHS_FALSE]] ], [ true, %[[ENTRY]] ], [ [[CMP3]], %[[LOR_RHS]] ]
+// CHECK-NEXT:    [[TMP7:%.*]] = zext i1 [[TMP6]] to i64
+// CHECK-NEXT:    [[COND:%.*]] = select i1 [[TMP6]], i32 42, i32 0
+// CHECK-NEXT:    ret i32 [[COND]]
+//
+int foo_123(int x) {
+  int cc;
+  asm volatile ("ahi %[x],42\n" : [x] "+d"(x), "=@cc" (cc));
+  return cc == 1 || cc == 2 || cc == 3 ? 42 : 0;
+}
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index e0b638201a04740..cb136fe2f446b43 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -5071,6 +5071,9 @@ class TargetLowering : public TargetLoweringBase {
                                             std::vector<SDValue> &Ops,
                                             SelectionDAG &DAG) const;
 
+  // Lower switch statement for flag output operand with SRL/IPM Sequence.
+  virtual bool canLowerSRL_IPM_Switch(SDValue Cond) const;
+
   // Lower custom output constraints. If invalid, return SDValue().
   virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Glue,
                                               const SDLoc &DL,
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 3b046aa25f54440..a32787bc882f175 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -2831,8 +2831,37 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
       Opcode = Instruction::And;
     else if (match(BOp, m_LogicalOr(m_Value(BOp0), m_Value(BOp1))))
       Opcode = Instruction::Or;
-
-    if (Opcode &&
+    auto &TLI = DAG.getTargetLoweringInfo();
+    bool BrSrlIPM = FuncInfo.MF->getTarget().getTargetTriple().getArch() ==
+                    Triple::ArchType::systemz;
+    // For Flag output operands SRL/IPM sequence, we want to avoid
+    // creating switch case, as it creates Basic Block and inhibits
+    // optimization in DAGCombiner for flag output operands.
+    const auto checkSRLIPM = [&TLI](const SDValue &Op) {
+      if (!Op.getNumOperands())
+        return false;
+      SDValue OpVal = Op.getOperand(0);
+      SDNode *N = OpVal.getNode();
+      if (N && N->getOpcode() == ISD::SRL)
+        return TLI.canLowerSRL_IPM_Switch(OpVal);
+      else if (N && OpVal.getNumOperands() &&
+               (N->getOpcode() == ISD::AND || N->getOpcode() == ISD::OR)) {
+        SDValue OpVal1 = OpVal.getOperand(0);
+        SDNode *N1 = OpVal1.getNode();
+        if (N1 && N1->getOpcode() == ISD::SRL)
+          return TLI.canLowerSRL_IPM_Switch(OpVal1);
+      }
+      return false;
+    };
+    if (BrSrlIPM) {
+      if (NodeMap.count(BOp0) && NodeMap[BOp0].getNode()) {
+        BrSrlIPM &= checkSRLIPM(getValue(BOp0));
+        if (NodeMap.count(BOp1) && NodeMap[BOp1].getNode())
+          BrSrlIPM &= checkSRLIPM(getValue(BOp1));
+      } else
+        BrSrlIPM = false;
+    }
+    if (Opcode && !BrSrlIPM &&
         !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
           match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value()))) &&
         !shouldKeepJumpConditionsTogether(
@@ -12043,18 +12072,41 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
       const APInt &SmallValue = Small.Low->getValue();
       const APInt &BigValue = Big.Low->getValue();
 
+      // Creating switch cases optimizing tranformation inhibits DAGCombiner
+      // for SystemZ for flag output operands. DAGCobiner compute cumulative
+      // CCMask for flag output operands SRL/IPM sequence, we want to avoid
+      // creating switch case, as it creates Basic Block and inhibits
+      // optimization in DAGCombiner for flag output operands.
+      // cases like (CC == 0) || (CC == 2) || (CC == 3), or
+      // (CC == 0) || (CC == 1) ^ (CC == 3), there could potentially be
+      // more cases like this.
+      const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+      bool IsSrlIPM = false;
+      if (NodeMap.count(Cond) && NodeMap[Cond].getNode())
+        IsSrlIPM = CurMF->getTarget().getTargetTriple().getArch() ==
+                       Triple::ArchType::systemz &&
+                   TLI.canLowerSRL_IPM_Switch(getValue(Cond));
       // Check that there is only one bit different.
       APInt CommonBit = BigValue ^ SmallValue;
-      if (CommonBit.isPowerOf2()) {
+      if (CommonBit.isPowerOf2() || IsSrlIPM) {
         SDValue CondLHS = getValue(Cond);
         EVT VT = CondLHS.getValueType();
         SDLoc DL = getCurSDLoc();
-
-        SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS,
-                                 DAG.getConstant(CommonBit, DL, VT));
-        SDValue Cond = DAG.getSetCC(
-            DL, MVT::i1, Or, DAG.getConstant(BigValue | SmallValue, DL, VT),
-            ISD::SETEQ);
+        SDValue Cond;
+
+        if (CommonBit.isPowerOf2()) {
+          SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS,
+                                   DAG.getConstant(CommonBit, DL, VT));
+          Cond = DAG.getSetCC(DL, MVT::i1, Or,
+                              DAG.getConstant(BigValue | SmallValue, DL, VT),
+                              ISD::SETEQ);
+        } else if (IsSrlIPM && BigValue == 3 && SmallValue == 0) {
+          SDValue SetCC =
+              DAG.getSetCC(DL, MVT::i32, CondLHS,
+                           DAG.getConstant(SmallValue, DL, VT), ISD::SETEQ);
+          Cond = DAG.getSetCC(DL, MVT::i32, SetCC,
+                              DAG.getConstant(BigValue, DL, VT), ISD::SETEQ);
+        }
 
         // Update successor info.
         // Both Small and Big will jump to Small.BB, so we sum up the
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 8287565336b54d1..3d48adac509cb9e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -5563,6 +5563,10 @@ const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
   return nullptr;
 }
 
+bool TargetLowering::canLowerSRL_IPM_Switch(SDValue Cond) const {
+  return false;
+}
+
 SDValue TargetLowering::LowerAsmOutputForConstraint(
     SDValue &Chain, SDValue &Glue, const SDLoc &DL,
     const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 3999b54de81b657..259da48a3b22321 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -1207,6 +1207,9 @@ SystemZTargetLowering::getConstraintType(StringRef Constraint) const {
     default:
       break;
     }
+  } else if (Constraint.size() == 5 && Constraint.starts_with("{")) {
+    if (StringRef("{@cc}").compare(Constraint) == 0)
+      return C_Other;
   }
   return TargetLowering::getConstraintType(Constraint);
 }
@@ -1389,6 +1392,10 @@ SystemZTargetLowering::getRegForInlineAsmConstraint(
       return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
                                  SystemZMC::VR128Regs, 32);
     }
+    if (Constraint[1] == '@') {
+      if (StringRef("{@cc}").compare(Constraint) == 0)
+        return std::make_pair(0u, &SystemZ::GR32BitRegClass);
+    }
   }
   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
 }
@@ -1421,6 +1428,35 @@ Register SystemZTargetLowering::getExceptionSelectorRegister(
   return Subtarget.isTargetXPLINK64() ? SystemZ::R2D : SystemZ::R7D;
 }
 
+// Lower @cc targets via setcc.
+SDValue SystemZTargetLowering::LowerAsmOutputForConstraint(
+    SDValue &Chain, SDValue &Glue, const SDLoc &DL,
+    const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
+  if (StringRef("{@cc}").compare(OpInfo.ConstraintCode) != 0)
+    return SDValue();
+
+  // Check that return type is valid.
+  if (OpInfo.ConstraintVT.isVector() || !OpInfo.ConstraintVT.isInteger() ||
+      OpInfo.ConstraintVT.getSizeInBits() < 8)
+    report_fatal_error("Glue output operand is of invalid type");
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  MRI.addLiveIn(SystemZ::CC);
+
+  if (Glue.getNode()) {
+    Glue = DAG.getCopyFromReg(Chain, DL, SystemZ::CC, MVT::i32, Glue);
+    Chain = Glue.getValue(1);
+  } else
+    Glue = DAG.getCopyFromReg(Chain, DL, SystemZ::CC, MVT::i32);
+
+  SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue);
+  SDValue CC = DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
+                           DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
+
+  return CC;
+}
+
 void SystemZTargetLowering::LowerAsmOperandForConstraint(
     SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
     SelectionDAG &DAG) const {
@@ -2485,6 +2521,21 @@ static unsigned CCMaskForCondCode(ISD::CondCode CC) {
 #undef CONV
 }
 
+static unsigned CCMaskForSystemZCCVal(unsigned CC) {
+  switch (CC) {
+  default:
+    llvm_unreachable("invalid integer condition!");
+  case 0:
+    return SystemZ::CCMASK_CMP_EQ;
+  case 1:
+    return SystemZ::CCMASK_CMP_LT;
+  case 2:
+    return SystemZ::CCMASK_CMP_GT;
+  case 3:
+    return SystemZ::CCMASK_CMP_UO;
+  }
+}
+
 // If C can be converted to a comparison against zero, ...
[truncated]

llvmbot avatar Feb 06 '25 00:02 llvmbot

@llvm/pr-subscribers-clang-codegen

Author: None (anoopkg6)

Changes

Add support for flag output operand "=@cc" for SystemZ and optimizing conditional branch for 14 possible combinations of CC mask.


Patch is 616.60 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/125970.diff

21 Files Affected:

  • (modified) clang/lib/Basic/Targets/SystemZ.cpp (+11)
  • (modified) clang/lib/Basic/Targets/SystemZ.h (+5)
  • (modified) clang/lib/CodeGen/CGStmt.cpp (+8-2)
  • (added) clang/test/CodeGen/inline-asm-systemz-flag-output.c (+149)
  • (modified) llvm/include/llvm/CodeGen/TargetLowering.h (+3)
  • (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+61-9)
  • (modified) llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp (+4)
  • (modified) llvm/lib/Target/SystemZ/SystemZISelLowering.cpp (+598-2)
  • (modified) llvm/lib/Target/SystemZ/SystemZISelLowering.h (+14)
  • (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccand.ll (+500)
  • (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccand_eq_noteq.ll (+939)
  • (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccand_not.ll (+779)
  • (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccmixed.ll (+2427)
  • (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccmixed_eq_noteq.ll (+5248)
  • (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccmixed_not.ll (+2543)
  • (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccor.ll (+1047)
  • (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccor_eq_noteq.ll (+854)
  • (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccor_not.ll (+806)
  • (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccxor.ll (+784)
  • (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccxor_eq_noteq.ll (+1083)
  • (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccxor_not.ll (+778)
diff --git a/clang/lib/Basic/Targets/SystemZ.cpp b/clang/lib/Basic/Targets/SystemZ.cpp
index 06f08db2eadd475..49f88b45220d0c4 100644
--- a/clang/lib/Basic/Targets/SystemZ.cpp
+++ b/clang/lib/Basic/Targets/SystemZ.cpp
@@ -90,6 +90,14 @@ bool SystemZTargetInfo::validateAsmConstraint(
   case 'T': // Likewise, plus an index
     Info.setAllowsMemory();
     return true;
+  case '@':
+    // CC condition changes.
+    if (strlen(Name) >= 3 && *(Name + 1) == 'c' && *(Name + 2) == 'c') {
+      Name += 2;
+      Info.setAllowsRegister();
+      return true;
+    }
+    return false;
   }
 }
 
@@ -150,6 +158,9 @@ unsigned SystemZTargetInfo::getMinGlobalAlign(uint64_t Size,
 
 void SystemZTargetInfo::getTargetDefines(const LangOptions &Opts,
                                          MacroBuilder &Builder) const {
+  // Inline assembly supports SystemZ flag outputs.
+  Builder.defineMacro("__GCC_ASM_FLAG_OUTPUTS__");
+
   Builder.defineMacro("__s390__");
   Builder.defineMacro("__s390x__");
   Builder.defineMacro("__zarch__");
diff --git a/clang/lib/Basic/Targets/SystemZ.h b/clang/lib/Basic/Targets/SystemZ.h
index ef9a07033a6e4ff..a6909ababdec001 100644
--- a/clang/lib/Basic/Targets/SystemZ.h
+++ b/clang/lib/Basic/Targets/SystemZ.h
@@ -118,6 +118,11 @@ class LLVM_LIBRARY_VISIBILITY SystemZTargetInfo : public TargetInfo {
                              TargetInfo::ConstraintInfo &info) const override;
 
   std::string convertConstraint(const char *&Constraint) const override {
+    if (strncmp(Constraint, "@cc", 3) == 0) {
+      std::string Converted = "{" + std::string(Constraint, 3) + "}";
+      Constraint += 3;
+      return Converted;
+    }
     switch (Constraint[0]) {
     case 'p': // Keep 'p' constraint.
       return std::string("p");
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 41dc91c578c800a..27f7bb652895839 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -2563,9 +2563,15 @@ EmitAsmStores(CodeGenFunction &CGF, const AsmStmt &S,
     if ((i < ResultRegIsFlagReg.size()) && ResultRegIsFlagReg[i]) {
       // Target must guarantee the Value `Tmp` here is lowered to a boolean
       // value.
-      llvm::Constant *Two = llvm::ConstantInt::get(Tmp->getType(), 2);
+      unsigned CCUpperBound = 2;
+      if (CGF.getTarget().getTriple().getArch() == llvm::Triple::systemz) {
+        // On this target CC value can be in range [0, 3].
+        CCUpperBound = 4;
+      }
+      llvm::Constant *CCUpperBoundConst =
+          llvm::ConstantInt::get(Tmp->getType(), CCUpperBound);
       llvm::Value *IsBooleanValue =
-          Builder.CreateCmp(llvm::CmpInst::ICMP_ULT, Tmp, Two);
+          Builder.CreateCmp(llvm::CmpInst::ICMP_ULT, Tmp, CCUpperBoundConst);
       llvm::Function *FnAssume = CGM.getIntrinsic(llvm::Intrinsic::assume);
       Builder.CreateCall(FnAssume, IsBooleanValue);
     }
diff --git a/clang/test/CodeGen/inline-asm-systemz-flag-output.c b/clang/test/CodeGen/inline-asm-systemz-flag-output.c
new file mode 100644
index 000000000000000..ab90e031df1f2b8
--- /dev/null
+++ b/clang/test/CodeGen/inline-asm-systemz-flag-output.c
@@ -0,0 +1,149 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple s390x-linux -emit-llvm -o - %s | FileCheck %s
+// CHECK-LABEL: define dso_local signext i32 @foo_012(
+// CHECK-SAME: i32 noundef signext [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*]]:
+// CHECK-NEXT:    [[X_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[CC:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[X]], ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = call { i32, i32 } asm sideeffect "ahi $0,42\0A", "=d,={@cc},0"(i32 [[TMP0]]) #[[ATTR2:[0-9]+]], !srcloc [[META2:![0-9]+]]
+// CHECK-NEXT:    [[ASMRESULT:%.*]] = extractvalue { i32, i32 } [[TMP1]], 0
+// CHECK-NEXT:    [[ASMRESULT1:%.*]] = extractvalue { i32, i32 } [[TMP1]], 1
+// CHECK-NEXT:    store i32 [[ASMRESULT]], ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[ASMRESULT1]], 4
+// CHECK-NEXT:    call void @llvm.assume(i1 [[TMP2]])
+// CHECK-NEXT:    store i32 [[ASMRESULT1]], ptr [[CC]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP3]], 0
+// CHECK-NEXT:    br i1 [[CMP]], label %[[LOR_END:.*]], label %[[LOR_LHS_FALSE:.*]]
+// CHECK:       [[LOR_LHS_FALSE]]:
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[TMP4]], 1
+// CHECK-NEXT:    br i1 [[CMP2]], label %[[LOR_END]], label %[[LOR_RHS:.*]]
+// CHECK:       [[LOR_RHS]]:
+// CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i32 [[TMP5]], 2
+// CHECK-NEXT:    br label %[[LOR_END]]
+// CHECK:       [[LOR_END]]:
+// CHECK-NEXT:    [[TMP6:%.*]] = phi i1 [ true, %[[LOR_LHS_FALSE]] ], [ true, %[[ENTRY]] ], [ [[CMP3]], %[[LOR_RHS]] ]
+// CHECK-NEXT:    [[TMP7:%.*]] = zext i1 [[TMP6]] to i64
+// CHECK-NEXT:    [[COND:%.*]] = select i1 [[TMP6]], i32 42, i32 0
+// CHECK-NEXT:    ret i32 [[COND]]
+//
+int foo_012(int x) {
+  int cc;
+  asm volatile ("ahi %[x],42\n" : [x] "+d"(x), "=@cc" (cc));
+  return cc == 0 || cc == 1 || cc == 2 ? 42 : 0;
+}
+
+// CHECK-LABEL: define dso_local signext i32 @foo_013(
+// CHECK-SAME: i32 noundef signext [[X:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*]]:
+// CHECK-NEXT:    [[X_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[CC:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[X]], ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = call { i32, i32 } asm sideeffect "ahi $0,42\0A", "=d,={@cc},0"(i32 [[TMP0]]) #[[ATTR2]], !srcloc [[META3:![0-9]+]]
+// CHECK-NEXT:    [[ASMRESULT:%.*]] = extractvalue { i32, i32 } [[TMP1]], 0
+// CHECK-NEXT:    [[ASMRESULT1:%.*]] = extractvalue { i32, i32 } [[TMP1]], 1
+// CHECK-NEXT:    store i32 [[ASMRESULT]], ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[ASMRESULT1]], 4
+// CHECK-NEXT:    call void @llvm.assume(i1 [[TMP2]])
+// CHECK-NEXT:    store i32 [[ASMRESULT1]], ptr [[CC]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP3]], 0
+// CHECK-NEXT:    br i1 [[CMP]], label %[[LOR_END:.*]], label %[[LOR_LHS_FALSE:.*]]
+// CHECK:       [[LOR_LHS_FALSE]]:
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[TMP4]], 1
+// CHECK-NEXT:    br i1 [[CMP2]], label %[[LOR_END]], label %[[LOR_RHS:.*]]
+// CHECK:       [[LOR_RHS]]:
+// CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i32 [[TMP5]], 3
+// CHECK-NEXT:    br label %[[LOR_END]]
+// CHECK:       [[LOR_END]]:
+// CHECK-NEXT:    [[TMP6:%.*]] = phi i1 [ true, %[[LOR_LHS_FALSE]] ], [ true, %[[ENTRY]] ], [ [[CMP3]], %[[LOR_RHS]] ]
+// CHECK-NEXT:    [[TMP7:%.*]] = zext i1 [[TMP6]] to i64
+// CHECK-NEXT:    [[COND:%.*]] = select i1 [[TMP6]], i32 42, i32 0
+// CHECK-NEXT:    ret i32 [[COND]]
+//
+int foo_013(int x) {
+  int cc;
+  asm volatile ("ahi %[x],42\n" : [x] "+d"(x), "=@cc" (cc));
+  return cc == 0 || cc == 1 || cc == 3 ? 42 : 0;
+}
+
+// CHECK-LABEL: define dso_local signext i32 @foo_023(
+// CHECK-SAME: i32 noundef signext [[X:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*]]:
+// CHECK-NEXT:    [[X_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[CC:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[X]], ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = call { i32, i32 } asm sideeffect "ahi $0,42\0A", "=d,={@cc},0"(i32 [[TMP0]]) #[[ATTR2]], !srcloc [[META4:![0-9]+]]
+// CHECK-NEXT:    [[ASMRESULT:%.*]] = extractvalue { i32, i32 } [[TMP1]], 0
+// CHECK-NEXT:    [[ASMRESULT1:%.*]] = extractvalue { i32, i32 } [[TMP1]], 1
+// CHECK-NEXT:    store i32 [[ASMRESULT]], ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[ASMRESULT1]], 4
+// CHECK-NEXT:    call void @llvm.assume(i1 [[TMP2]])
+// CHECK-NEXT:    store i32 [[ASMRESULT1]], ptr [[CC]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP3]], 0
+// CHECK-NEXT:    br i1 [[CMP]], label %[[LOR_END:.*]], label %[[LOR_LHS_FALSE:.*]]
+// CHECK:       [[LOR_LHS_FALSE]]:
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[TMP4]], 2
+// CHECK-NEXT:    br i1 [[CMP2]], label %[[LOR_END]], label %[[LOR_RHS:.*]]
+// CHECK:       [[LOR_RHS]]:
+// CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i32 [[TMP5]], 3
+// CHECK-NEXT:    br label %[[LOR_END]]
+// CHECK:       [[LOR_END]]:
+// CHECK-NEXT:    [[TMP6:%.*]] = phi i1 [ true, %[[LOR_LHS_FALSE]] ], [ true, %[[ENTRY]] ], [ [[CMP3]], %[[LOR_RHS]] ]
+// CHECK-NEXT:    [[TMP7:%.*]] = zext i1 [[TMP6]] to i64
+// CHECK-NEXT:    [[COND:%.*]] = select i1 [[TMP6]], i32 42, i32 0
+// CHECK-NEXT:    ret i32 [[COND]]
+//
+int foo_023(int x) {
+  int cc;
+  asm volatile ("ahi %[x],42\n" : [x] "+d"(x), "=@cc" (cc));
+  return cc == 0 || cc == 2 || cc == 3 ? 42 : 0;
+}
+
+// CHECK-LABEL: define dso_local signext i32 @foo_123(
+// CHECK-SAME: i32 noundef signext [[X:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*]]:
+// CHECK-NEXT:    [[X_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[CC:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[X]], ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = call { i32, i32 } asm sideeffect "ahi $0,42\0A", "=d,={@cc},0"(i32 [[TMP0]]) #[[ATTR2]], !srcloc [[META5:![0-9]+]]
+// CHECK-NEXT:    [[ASMRESULT:%.*]] = extractvalue { i32, i32 } [[TMP1]], 0
+// CHECK-NEXT:    [[ASMRESULT1:%.*]] = extractvalue { i32, i32 } [[TMP1]], 1
+// CHECK-NEXT:    store i32 [[ASMRESULT]], ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[ASMRESULT1]], 4
+// CHECK-NEXT:    call void @llvm.assume(i1 [[TMP2]])
+// CHECK-NEXT:    store i32 [[ASMRESULT1]], ptr [[CC]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP3]], 1
+// CHECK-NEXT:    br i1 [[CMP]], label %[[LOR_END:.*]], label %[[LOR_LHS_FALSE:.*]]
+// CHECK:       [[LOR_LHS_FALSE]]:
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[TMP4]], 2
+// CHECK-NEXT:    br i1 [[CMP2]], label %[[LOR_END]], label %[[LOR_RHS:.*]]
+// CHECK:       [[LOR_RHS]]:
+// CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i32 [[TMP5]], 3
+// CHECK-NEXT:    br label %[[LOR_END]]
+// CHECK:       [[LOR_END]]:
+// CHECK-NEXT:    [[TMP6:%.*]] = phi i1 [ true, %[[LOR_LHS_FALSE]] ], [ true, %[[ENTRY]] ], [ [[CMP3]], %[[LOR_RHS]] ]
+// CHECK-NEXT:    [[TMP7:%.*]] = zext i1 [[TMP6]] to i64
+// CHECK-NEXT:    [[COND:%.*]] = select i1 [[TMP6]], i32 42, i32 0
+// CHECK-NEXT:    ret i32 [[COND]]
+//
+int foo_123(int x) {
+  int cc;
+  asm volatile ("ahi %[x],42\n" : [x] "+d"(x), "=@cc" (cc));
+  return cc == 1 || cc == 2 || cc == 3 ? 42 : 0;
+}
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index e0b638201a04740..cb136fe2f446b43 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -5071,6 +5071,9 @@ class TargetLowering : public TargetLoweringBase {
                                             std::vector<SDValue> &Ops,
                                             SelectionDAG &DAG) const;
 
+  // Lower switch statement for flag output operand with SRL/IPM Sequence.
+  virtual bool canLowerSRL_IPM_Switch(SDValue Cond) const;
+
   // Lower custom output constraints. If invalid, return SDValue().
   virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Glue,
                                               const SDLoc &DL,
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 3b046aa25f54440..a32787bc882f175 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -2831,8 +2831,37 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
       Opcode = Instruction::And;
     else if (match(BOp, m_LogicalOr(m_Value(BOp0), m_Value(BOp1))))
       Opcode = Instruction::Or;
-
-    if (Opcode &&
+    auto &TLI = DAG.getTargetLoweringInfo();
+    bool BrSrlIPM = FuncInfo.MF->getTarget().getTargetTriple().getArch() ==
+                    Triple::ArchType::systemz;
+    // For Flag output operands SRL/IPM sequence, we want to avoid
+    // creating switch case, as it creates Basic Block and inhibits
+    // optimization in DAGCombiner for flag output operands.
+    const auto checkSRLIPM = [&TLI](const SDValue &Op) {
+      if (!Op.getNumOperands())
+        return false;
+      SDValue OpVal = Op.getOperand(0);
+      SDNode *N = OpVal.getNode();
+      if (N && N->getOpcode() == ISD::SRL)
+        return TLI.canLowerSRL_IPM_Switch(OpVal);
+      else if (N && OpVal.getNumOperands() &&
+               (N->getOpcode() == ISD::AND || N->getOpcode() == ISD::OR)) {
+        SDValue OpVal1 = OpVal.getOperand(0);
+        SDNode *N1 = OpVal1.getNode();
+        if (N1 && N1->getOpcode() == ISD::SRL)
+          return TLI.canLowerSRL_IPM_Switch(OpVal1);
+      }
+      return false;
+    };
+    if (BrSrlIPM) {
+      if (NodeMap.count(BOp0) && NodeMap[BOp0].getNode()) {
+        BrSrlIPM &= checkSRLIPM(getValue(BOp0));
+        if (NodeMap.count(BOp1) && NodeMap[BOp1].getNode())
+          BrSrlIPM &= checkSRLIPM(getValue(BOp1));
+      } else
+        BrSrlIPM = false;
+    }
+    if (Opcode && !BrSrlIPM &&
         !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
           match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value()))) &&
         !shouldKeepJumpConditionsTogether(
@@ -12043,18 +12072,41 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
       const APInt &SmallValue = Small.Low->getValue();
       const APInt &BigValue = Big.Low->getValue();
 
+      // Creating switch cases optimizing tranformation inhibits DAGCombiner
+      // for SystemZ for flag output operands. DAGCobiner compute cumulative
+      // CCMask for flag output operands SRL/IPM sequence, we want to avoid
+      // creating switch case, as it creates Basic Block and inhibits
+      // optimization in DAGCombiner for flag output operands.
+      // cases like (CC == 0) || (CC == 2) || (CC == 3), or
+      // (CC == 0) || (CC == 1) ^ (CC == 3), there could potentially be
+      // more cases like this.
+      const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+      bool IsSrlIPM = false;
+      if (NodeMap.count(Cond) && NodeMap[Cond].getNode())
+        IsSrlIPM = CurMF->getTarget().getTargetTriple().getArch() ==
+                       Triple::ArchType::systemz &&
+                   TLI.canLowerSRL_IPM_Switch(getValue(Cond));
       // Check that there is only one bit different.
       APInt CommonBit = BigValue ^ SmallValue;
-      if (CommonBit.isPowerOf2()) {
+      if (CommonBit.isPowerOf2() || IsSrlIPM) {
         SDValue CondLHS = getValue(Cond);
         EVT VT = CondLHS.getValueType();
         SDLoc DL = getCurSDLoc();
-
-        SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS,
-                                 DAG.getConstant(CommonBit, DL, VT));
-        SDValue Cond = DAG.getSetCC(
-            DL, MVT::i1, Or, DAG.getConstant(BigValue | SmallValue, DL, VT),
-            ISD::SETEQ);
+        SDValue Cond;
+
+        if (CommonBit.isPowerOf2()) {
+          SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS,
+                                   DAG.getConstant(CommonBit, DL, VT));
+          Cond = DAG.getSetCC(DL, MVT::i1, Or,
+                              DAG.getConstant(BigValue | SmallValue, DL, VT),
+                              ISD::SETEQ);
+        } else if (IsSrlIPM && BigValue == 3 && SmallValue == 0) {
+          SDValue SetCC =
+              DAG.getSetCC(DL, MVT::i32, CondLHS,
+                           DAG.getConstant(SmallValue, DL, VT), ISD::SETEQ);
+          Cond = DAG.getSetCC(DL, MVT::i32, SetCC,
+                              DAG.getConstant(BigValue, DL, VT), ISD::SETEQ);
+        }
 
         // Update successor info.
         // Both Small and Big will jump to Small.BB, so we sum up the
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 8287565336b54d1..3d48adac509cb9e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -5563,6 +5563,10 @@ const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
   return nullptr;
 }
 
+bool TargetLowering::canLowerSRL_IPM_Switch(SDValue Cond) const {
+  return false;
+}
+
 SDValue TargetLowering::LowerAsmOutputForConstraint(
     SDValue &Chain, SDValue &Glue, const SDLoc &DL,
     const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 3999b54de81b657..259da48a3b22321 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -1207,6 +1207,9 @@ SystemZTargetLowering::getConstraintType(StringRef Constraint) const {
     default:
       break;
     }
+  } else if (Constraint.size() == 5 && Constraint.starts_with("{")) {
+    if (StringRef("{@cc}").compare(Constraint) == 0)
+      return C_Other;
   }
   return TargetLowering::getConstraintType(Constraint);
 }
@@ -1389,6 +1392,10 @@ SystemZTargetLowering::getRegForInlineAsmConstraint(
       return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
                                  SystemZMC::VR128Regs, 32);
     }
+    if (Constraint[1] == '@') {
+      if (StringRef("{@cc}").compare(Constraint) == 0)
+        return std::make_pair(0u, &SystemZ::GR32BitRegClass);
+    }
   }
   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
 }
@@ -1421,6 +1428,35 @@ Register SystemZTargetLowering::getExceptionSelectorRegister(
   return Subtarget.isTargetXPLINK64() ? SystemZ::R2D : SystemZ::R7D;
 }
 
+// Lower @cc targets via setcc.
+SDValue SystemZTargetLowering::LowerAsmOutputForConstraint(
+    SDValue &Chain, SDValue &Glue, const SDLoc &DL,
+    const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
+  if (StringRef("{@cc}").compare(OpInfo.ConstraintCode) != 0)
+    return SDValue();
+
+  // Check that return type is valid.
+  if (OpInfo.ConstraintVT.isVector() || !OpInfo.ConstraintVT.isInteger() ||
+      OpInfo.ConstraintVT.getSizeInBits() < 8)
+    report_fatal_error("Glue output operand is of invalid type");
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  MRI.addLiveIn(SystemZ::CC);
+
+  if (Glue.getNode()) {
+    Glue = DAG.getCopyFromReg(Chain, DL, SystemZ::CC, MVT::i32, Glue);
+    Chain = Glue.getValue(1);
+  } else
+    Glue = DAG.getCopyFromReg(Chain, DL, SystemZ::CC, MVT::i32);
+
+  SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue);
+  SDValue CC = DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
+                           DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
+
+  return CC;
+}
+
 void SystemZTargetLowering::LowerAsmOperandForConstraint(
     SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
     SelectionDAG &DAG) const {
@@ -2485,6 +2521,21 @@ static unsigned CCMaskForCondCode(ISD::CondCode CC) {
 #undef CONV
 }
 
+static unsigned CCMaskForSystemZCCVal(unsigned CC) {
+  switch (CC) {
+  default:
+    llvm_unreachable("invalid integer condition!");
+  case 0:
+    return SystemZ::CCMASK_CMP_EQ;
+  case 1:
+    return SystemZ::CCMASK_CMP_LT;
+  case 2:
+    return SystemZ::CCMASK_CMP_GT;
+  case 3:
+    return SystemZ::CCMASK_CMP_UO;
+  }
+}
+
 // If C can be converted to a comparison against zero, ...
[truncated]

llvmbot avatar Feb 06 '25 00:02 llvmbot

@llvm/pr-subscribers-llvm-selectiondag

Author: None (anoopkg6)

Changes

Add support for flag output operand "=@cc" for SystemZ and optimizing conditional branch for 14 possible combinations of CC mask.


Patch is 616.60 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/125970.diff

21 Files Affected:

  • (modified) clang/lib/Basic/Targets/SystemZ.cpp (+11)
  • (modified) clang/lib/Basic/Targets/SystemZ.h (+5)
  • (modified) clang/lib/CodeGen/CGStmt.cpp (+8-2)
  • (added) clang/test/CodeGen/inline-asm-systemz-flag-output.c (+149)
  • (modified) llvm/include/llvm/CodeGen/TargetLowering.h (+3)
  • (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+61-9)
  • (modified) llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp (+4)
  • (modified) llvm/lib/Target/SystemZ/SystemZISelLowering.cpp (+598-2)
  • (modified) llvm/lib/Target/SystemZ/SystemZISelLowering.h (+14)
  • (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccand.ll (+500)
  • (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccand_eq_noteq.ll (+939)
  • (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccand_not.ll (+779)
  • (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccmixed.ll (+2427)
  • (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccmixed_eq_noteq.ll (+5248)
  • (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccmixed_not.ll (+2543)
  • (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccor.ll (+1047)
  • (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccor_eq_noteq.ll (+854)
  • (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccor_not.ll (+806)
  • (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccxor.ll (+784)
  • (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccxor_eq_noteq.ll (+1083)
  • (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccxor_not.ll (+778)
diff --git a/clang/lib/Basic/Targets/SystemZ.cpp b/clang/lib/Basic/Targets/SystemZ.cpp
index 06f08db2eadd475..49f88b45220d0c4 100644
--- a/clang/lib/Basic/Targets/SystemZ.cpp
+++ b/clang/lib/Basic/Targets/SystemZ.cpp
@@ -90,6 +90,14 @@ bool SystemZTargetInfo::validateAsmConstraint(
   case 'T': // Likewise, plus an index
     Info.setAllowsMemory();
     return true;
+  case '@':
+    // CC condition changes.
+    if (strlen(Name) >= 3 && *(Name + 1) == 'c' && *(Name + 2) == 'c') {
+      Name += 2;
+      Info.setAllowsRegister();
+      return true;
+    }
+    return false;
   }
 }
 
@@ -150,6 +158,9 @@ unsigned SystemZTargetInfo::getMinGlobalAlign(uint64_t Size,
 
 void SystemZTargetInfo::getTargetDefines(const LangOptions &Opts,
                                          MacroBuilder &Builder) const {
+  // Inline assembly supports SystemZ flag outputs.
+  Builder.defineMacro("__GCC_ASM_FLAG_OUTPUTS__");
+
   Builder.defineMacro("__s390__");
   Builder.defineMacro("__s390x__");
   Builder.defineMacro("__zarch__");
diff --git a/clang/lib/Basic/Targets/SystemZ.h b/clang/lib/Basic/Targets/SystemZ.h
index ef9a07033a6e4ff..a6909ababdec001 100644
--- a/clang/lib/Basic/Targets/SystemZ.h
+++ b/clang/lib/Basic/Targets/SystemZ.h
@@ -118,6 +118,11 @@ class LLVM_LIBRARY_VISIBILITY SystemZTargetInfo : public TargetInfo {
                              TargetInfo::ConstraintInfo &info) const override;
 
   std::string convertConstraint(const char *&Constraint) const override {
+    if (strncmp(Constraint, "@cc", 3) == 0) {
+      std::string Converted = "{" + std::string(Constraint, 3) + "}";
+      Constraint += 3;
+      return Converted;
+    }
     switch (Constraint[0]) {
     case 'p': // Keep 'p' constraint.
       return std::string("p");
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 41dc91c578c800a..27f7bb652895839 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -2563,9 +2563,15 @@ EmitAsmStores(CodeGenFunction &CGF, const AsmStmt &S,
     if ((i < ResultRegIsFlagReg.size()) && ResultRegIsFlagReg[i]) {
       // Target must guarantee the Value `Tmp` here is lowered to a boolean
       // value.
-      llvm::Constant *Two = llvm::ConstantInt::get(Tmp->getType(), 2);
+      unsigned CCUpperBound = 2;
+      if (CGF.getTarget().getTriple().getArch() == llvm::Triple::systemz) {
+        // On this target CC value can be in range [0, 3].
+        CCUpperBound = 4;
+      }
+      llvm::Constant *CCUpperBoundConst =
+          llvm::ConstantInt::get(Tmp->getType(), CCUpperBound);
       llvm::Value *IsBooleanValue =
-          Builder.CreateCmp(llvm::CmpInst::ICMP_ULT, Tmp, Two);
+          Builder.CreateCmp(llvm::CmpInst::ICMP_ULT, Tmp, CCUpperBoundConst);
       llvm::Function *FnAssume = CGM.getIntrinsic(llvm::Intrinsic::assume);
       Builder.CreateCall(FnAssume, IsBooleanValue);
     }
diff --git a/clang/test/CodeGen/inline-asm-systemz-flag-output.c b/clang/test/CodeGen/inline-asm-systemz-flag-output.c
new file mode 100644
index 000000000000000..ab90e031df1f2b8
--- /dev/null
+++ b/clang/test/CodeGen/inline-asm-systemz-flag-output.c
@@ -0,0 +1,149 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple s390x-linux -emit-llvm -o - %s | FileCheck %s
+// CHECK-LABEL: define dso_local signext i32 @foo_012(
+// CHECK-SAME: i32 noundef signext [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*]]:
+// CHECK-NEXT:    [[X_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[CC:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[X]], ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = call { i32, i32 } asm sideeffect "ahi $0,42\0A", "=d,={@cc},0"(i32 [[TMP0]]) #[[ATTR2:[0-9]+]], !srcloc [[META2:![0-9]+]]
+// CHECK-NEXT:    [[ASMRESULT:%.*]] = extractvalue { i32, i32 } [[TMP1]], 0
+// CHECK-NEXT:    [[ASMRESULT1:%.*]] = extractvalue { i32, i32 } [[TMP1]], 1
+// CHECK-NEXT:    store i32 [[ASMRESULT]], ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[ASMRESULT1]], 4
+// CHECK-NEXT:    call void @llvm.assume(i1 [[TMP2]])
+// CHECK-NEXT:    store i32 [[ASMRESULT1]], ptr [[CC]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP3]], 0
+// CHECK-NEXT:    br i1 [[CMP]], label %[[LOR_END:.*]], label %[[LOR_LHS_FALSE:.*]]
+// CHECK:       [[LOR_LHS_FALSE]]:
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[TMP4]], 1
+// CHECK-NEXT:    br i1 [[CMP2]], label %[[LOR_END]], label %[[LOR_RHS:.*]]
+// CHECK:       [[LOR_RHS]]:
+// CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i32 [[TMP5]], 2
+// CHECK-NEXT:    br label %[[LOR_END]]
+// CHECK:       [[LOR_END]]:
+// CHECK-NEXT:    [[TMP6:%.*]] = phi i1 [ true, %[[LOR_LHS_FALSE]] ], [ true, %[[ENTRY]] ], [ [[CMP3]], %[[LOR_RHS]] ]
+// CHECK-NEXT:    [[TMP7:%.*]] = zext i1 [[TMP6]] to i64
+// CHECK-NEXT:    [[COND:%.*]] = select i1 [[TMP6]], i32 42, i32 0
+// CHECK-NEXT:    ret i32 [[COND]]
+//
+int foo_012(int x) {
+  int cc;
+  asm volatile ("ahi %[x],42\n" : [x] "+d"(x), "=@cc" (cc));
+  return cc == 0 || cc == 1 || cc == 2 ? 42 : 0;
+}
+
+// CHECK-LABEL: define dso_local signext i32 @foo_013(
+// CHECK-SAME: i32 noundef signext [[X:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*]]:
+// CHECK-NEXT:    [[X_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[CC:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[X]], ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = call { i32, i32 } asm sideeffect "ahi $0,42\0A", "=d,={@cc},0"(i32 [[TMP0]]) #[[ATTR2]], !srcloc [[META3:![0-9]+]]
+// CHECK-NEXT:    [[ASMRESULT:%.*]] = extractvalue { i32, i32 } [[TMP1]], 0
+// CHECK-NEXT:    [[ASMRESULT1:%.*]] = extractvalue { i32, i32 } [[TMP1]], 1
+// CHECK-NEXT:    store i32 [[ASMRESULT]], ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[ASMRESULT1]], 4
+// CHECK-NEXT:    call void @llvm.assume(i1 [[TMP2]])
+// CHECK-NEXT:    store i32 [[ASMRESULT1]], ptr [[CC]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP3]], 0
+// CHECK-NEXT:    br i1 [[CMP]], label %[[LOR_END:.*]], label %[[LOR_LHS_FALSE:.*]]
+// CHECK:       [[LOR_LHS_FALSE]]:
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[TMP4]], 1
+// CHECK-NEXT:    br i1 [[CMP2]], label %[[LOR_END]], label %[[LOR_RHS:.*]]
+// CHECK:       [[LOR_RHS]]:
+// CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i32 [[TMP5]], 3
+// CHECK-NEXT:    br label %[[LOR_END]]
+// CHECK:       [[LOR_END]]:
+// CHECK-NEXT:    [[TMP6:%.*]] = phi i1 [ true, %[[LOR_LHS_FALSE]] ], [ true, %[[ENTRY]] ], [ [[CMP3]], %[[LOR_RHS]] ]
+// CHECK-NEXT:    [[TMP7:%.*]] = zext i1 [[TMP6]] to i64
+// CHECK-NEXT:    [[COND:%.*]] = select i1 [[TMP6]], i32 42, i32 0
+// CHECK-NEXT:    ret i32 [[COND]]
+//
+int foo_013(int x) {
+  int cc;
+  asm volatile ("ahi %[x],42\n" : [x] "+d"(x), "=@cc" (cc));
+  return cc == 0 || cc == 1 || cc == 3 ? 42 : 0;
+}
+
+// CHECK-LABEL: define dso_local signext i32 @foo_023(
+// CHECK-SAME: i32 noundef signext [[X:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*]]:
+// CHECK-NEXT:    [[X_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[CC:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[X]], ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = call { i32, i32 } asm sideeffect "ahi $0,42\0A", "=d,={@cc},0"(i32 [[TMP0]]) #[[ATTR2]], !srcloc [[META4:![0-9]+]]
+// CHECK-NEXT:    [[ASMRESULT:%.*]] = extractvalue { i32, i32 } [[TMP1]], 0
+// CHECK-NEXT:    [[ASMRESULT1:%.*]] = extractvalue { i32, i32 } [[TMP1]], 1
+// CHECK-NEXT:    store i32 [[ASMRESULT]], ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[ASMRESULT1]], 4
+// CHECK-NEXT:    call void @llvm.assume(i1 [[TMP2]])
+// CHECK-NEXT:    store i32 [[ASMRESULT1]], ptr [[CC]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP3]], 0
+// CHECK-NEXT:    br i1 [[CMP]], label %[[LOR_END:.*]], label %[[LOR_LHS_FALSE:.*]]
+// CHECK:       [[LOR_LHS_FALSE]]:
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[TMP4]], 2
+// CHECK-NEXT:    br i1 [[CMP2]], label %[[LOR_END]], label %[[LOR_RHS:.*]]
+// CHECK:       [[LOR_RHS]]:
+// CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i32 [[TMP5]], 3
+// CHECK-NEXT:    br label %[[LOR_END]]
+// CHECK:       [[LOR_END]]:
+// CHECK-NEXT:    [[TMP6:%.*]] = phi i1 [ true, %[[LOR_LHS_FALSE]] ], [ true, %[[ENTRY]] ], [ [[CMP3]], %[[LOR_RHS]] ]
+// CHECK-NEXT:    [[TMP7:%.*]] = zext i1 [[TMP6]] to i64
+// CHECK-NEXT:    [[COND:%.*]] = select i1 [[TMP6]], i32 42, i32 0
+// CHECK-NEXT:    ret i32 [[COND]]
+//
+int foo_023(int x) {
+  int cc;
+  asm volatile ("ahi %[x],42\n" : [x] "+d"(x), "=@cc" (cc));
+  return cc == 0 || cc == 2 || cc == 3 ? 42 : 0;
+}
+
+// CHECK-LABEL: define dso_local signext i32 @foo_123(
+// CHECK-SAME: i32 noundef signext [[X:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*]]:
+// CHECK-NEXT:    [[X_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[CC:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[X]], ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = call { i32, i32 } asm sideeffect "ahi $0,42\0A", "=d,={@cc},0"(i32 [[TMP0]]) #[[ATTR2]], !srcloc [[META5:![0-9]+]]
+// CHECK-NEXT:    [[ASMRESULT:%.*]] = extractvalue { i32, i32 } [[TMP1]], 0
+// CHECK-NEXT:    [[ASMRESULT1:%.*]] = extractvalue { i32, i32 } [[TMP1]], 1
+// CHECK-NEXT:    store i32 [[ASMRESULT]], ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[ASMRESULT1]], 4
+// CHECK-NEXT:    call void @llvm.assume(i1 [[TMP2]])
+// CHECK-NEXT:    store i32 [[ASMRESULT1]], ptr [[CC]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP3]], 1
+// CHECK-NEXT:    br i1 [[CMP]], label %[[LOR_END:.*]], label %[[LOR_LHS_FALSE:.*]]
+// CHECK:       [[LOR_LHS_FALSE]]:
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[TMP4]], 2
+// CHECK-NEXT:    br i1 [[CMP2]], label %[[LOR_END]], label %[[LOR_RHS:.*]]
+// CHECK:       [[LOR_RHS]]:
+// CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i32 [[TMP5]], 3
+// CHECK-NEXT:    br label %[[LOR_END]]
+// CHECK:       [[LOR_END]]:
+// CHECK-NEXT:    [[TMP6:%.*]] = phi i1 [ true, %[[LOR_LHS_FALSE]] ], [ true, %[[ENTRY]] ], [ [[CMP3]], %[[LOR_RHS]] ]
+// CHECK-NEXT:    [[TMP7:%.*]] = zext i1 [[TMP6]] to i64
+// CHECK-NEXT:    [[COND:%.*]] = select i1 [[TMP6]], i32 42, i32 0
+// CHECK-NEXT:    ret i32 [[COND]]
+//
+int foo_123(int x) {
+  int cc;
+  asm volatile ("ahi %[x],42\n" : [x] "+d"(x), "=@cc" (cc));
+  return cc == 1 || cc == 2 || cc == 3 ? 42 : 0;
+}
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index e0b638201a04740..cb136fe2f446b43 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -5071,6 +5071,9 @@ class TargetLowering : public TargetLoweringBase {
                                             std::vector<SDValue> &Ops,
                                             SelectionDAG &DAG) const;
 
+  // Lower switch statement for flag output operand with SRL/IPM Sequence.
+  virtual bool canLowerSRL_IPM_Switch(SDValue Cond) const;
+
   // Lower custom output constraints. If invalid, return SDValue().
   virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Glue,
                                               const SDLoc &DL,
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 3b046aa25f54440..a32787bc882f175 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -2831,8 +2831,37 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
       Opcode = Instruction::And;
     else if (match(BOp, m_LogicalOr(m_Value(BOp0), m_Value(BOp1))))
       Opcode = Instruction::Or;
-
-    if (Opcode &&
+    auto &TLI = DAG.getTargetLoweringInfo();
+    bool BrSrlIPM = FuncInfo.MF->getTarget().getTargetTriple().getArch() ==
+                    Triple::ArchType::systemz;
+    // For Flag output operands SRL/IPM sequence, we want to avoid
+    // creating switch case, as it creates Basic Block and inhibits
+    // optimization in DAGCombiner for flag output operands.
+    const auto checkSRLIPM = [&TLI](const SDValue &Op) {
+      if (!Op.getNumOperands())
+        return false;
+      SDValue OpVal = Op.getOperand(0);
+      SDNode *N = OpVal.getNode();
+      if (N && N->getOpcode() == ISD::SRL)
+        return TLI.canLowerSRL_IPM_Switch(OpVal);
+      else if (N && OpVal.getNumOperands() &&
+               (N->getOpcode() == ISD::AND || N->getOpcode() == ISD::OR)) {
+        SDValue OpVal1 = OpVal.getOperand(0);
+        SDNode *N1 = OpVal1.getNode();
+        if (N1 && N1->getOpcode() == ISD::SRL)
+          return TLI.canLowerSRL_IPM_Switch(OpVal1);
+      }
+      return false;
+    };
+    if (BrSrlIPM) {
+      if (NodeMap.count(BOp0) && NodeMap[BOp0].getNode()) {
+        BrSrlIPM &= checkSRLIPM(getValue(BOp0));
+        if (NodeMap.count(BOp1) && NodeMap[BOp1].getNode())
+          BrSrlIPM &= checkSRLIPM(getValue(BOp1));
+      } else
+        BrSrlIPM = false;
+    }
+    if (Opcode && !BrSrlIPM &&
         !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
           match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value()))) &&
         !shouldKeepJumpConditionsTogether(
@@ -12043,18 +12072,41 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
       const APInt &SmallValue = Small.Low->getValue();
       const APInt &BigValue = Big.Low->getValue();
 
+      // Creating switch cases optimizing tranformation inhibits DAGCombiner
+      // for SystemZ for flag output operands. DAGCobiner compute cumulative
+      // CCMask for flag output operands SRL/IPM sequence, we want to avoid
+      // creating switch case, as it creates Basic Block and inhibits
+      // optimization in DAGCombiner for flag output operands.
+      // cases like (CC == 0) || (CC == 2) || (CC == 3), or
+      // (CC == 0) || (CC == 1) ^ (CC == 3), there could potentially be
+      // more cases like this.
+      const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+      bool IsSrlIPM = false;
+      if (NodeMap.count(Cond) && NodeMap[Cond].getNode())
+        IsSrlIPM = CurMF->getTarget().getTargetTriple().getArch() ==
+                       Triple::ArchType::systemz &&
+                   TLI.canLowerSRL_IPM_Switch(getValue(Cond));
       // Check that there is only one bit different.
       APInt CommonBit = BigValue ^ SmallValue;
-      if (CommonBit.isPowerOf2()) {
+      if (CommonBit.isPowerOf2() || IsSrlIPM) {
         SDValue CondLHS = getValue(Cond);
         EVT VT = CondLHS.getValueType();
         SDLoc DL = getCurSDLoc();
-
-        SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS,
-                                 DAG.getConstant(CommonBit, DL, VT));
-        SDValue Cond = DAG.getSetCC(
-            DL, MVT::i1, Or, DAG.getConstant(BigValue | SmallValue, DL, VT),
-            ISD::SETEQ);
+        SDValue Cond;
+
+        if (CommonBit.isPowerOf2()) {
+          SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS,
+                                   DAG.getConstant(CommonBit, DL, VT));
+          Cond = DAG.getSetCC(DL, MVT::i1, Or,
+                              DAG.getConstant(BigValue | SmallValue, DL, VT),
+                              ISD::SETEQ);
+        } else if (IsSrlIPM && BigValue == 3 && SmallValue == 0) {
+          SDValue SetCC =
+              DAG.getSetCC(DL, MVT::i32, CondLHS,
+                           DAG.getConstant(SmallValue, DL, VT), ISD::SETEQ);
+          Cond = DAG.getSetCC(DL, MVT::i32, SetCC,
+                              DAG.getConstant(BigValue, DL, VT), ISD::SETEQ);
+        }
 
         // Update successor info.
         // Both Small and Big will jump to Small.BB, so we sum up the
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 8287565336b54d1..3d48adac509cb9e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -5563,6 +5563,10 @@ const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
   return nullptr;
 }
 
+bool TargetLowering::canLowerSRL_IPM_Switch(SDValue Cond) const {
+  return false;
+}
+
 SDValue TargetLowering::LowerAsmOutputForConstraint(
     SDValue &Chain, SDValue &Glue, const SDLoc &DL,
     const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 3999b54de81b657..259da48a3b22321 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -1207,6 +1207,9 @@ SystemZTargetLowering::getConstraintType(StringRef Constraint) const {
     default:
       break;
     }
+  } else if (Constraint.size() == 5 && Constraint.starts_with("{")) {
+    if (StringRef("{@cc}").compare(Constraint) == 0)
+      return C_Other;
   }
   return TargetLowering::getConstraintType(Constraint);
 }
@@ -1389,6 +1392,10 @@ SystemZTargetLowering::getRegForInlineAsmConstraint(
       return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
                                  SystemZMC::VR128Regs, 32);
     }
+    if (Constraint[1] == '@') {
+      if (StringRef("{@cc}").compare(Constraint) == 0)
+        return std::make_pair(0u, &SystemZ::GR32BitRegClass);
+    }
   }
   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
 }
@@ -1421,6 +1428,35 @@ Register SystemZTargetLowering::getExceptionSelectorRegister(
   return Subtarget.isTargetXPLINK64() ? SystemZ::R2D : SystemZ::R7D;
 }
 
+// Lower @cc targets via setcc.
+SDValue SystemZTargetLowering::LowerAsmOutputForConstraint(
+    SDValue &Chain, SDValue &Glue, const SDLoc &DL,
+    const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
+  if (StringRef("{@cc}").compare(OpInfo.ConstraintCode) != 0)
+    return SDValue();
+
+  // Check that return type is valid.
+  if (OpInfo.ConstraintVT.isVector() || !OpInfo.ConstraintVT.isInteger() ||
+      OpInfo.ConstraintVT.getSizeInBits() < 8)
+    report_fatal_error("Glue output operand is of invalid type");
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  MRI.addLiveIn(SystemZ::CC);
+
+  if (Glue.getNode()) {
+    Glue = DAG.getCopyFromReg(Chain, DL, SystemZ::CC, MVT::i32, Glue);
+    Chain = Glue.getValue(1);
+  } else
+    Glue = DAG.getCopyFromReg(Chain, DL, SystemZ::CC, MVT::i32);
+
+  SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue);
+  SDValue CC = DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
+                           DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
+
+  return CC;
+}
+
 void SystemZTargetLowering::LowerAsmOperandForConstraint(
     SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
     SelectionDAG &DAG) const {
@@ -2485,6 +2521,21 @@ static unsigned CCMaskForCondCode(ISD::CondCode CC) {
 #undef CONV
 }
 
+static unsigned CCMaskForSystemZCCVal(unsigned CC) {
+  switch (CC) {
+  default:
+    llvm_unreachable("invalid integer condition!");
+  case 0:
+    return SystemZ::CCMASK_CMP_EQ;
+  case 1:
+    return SystemZ::CCMASK_CMP_LT;
+  case 2:
+    return SystemZ::CCMASK_CMP_GT;
+  case 3:
+    return SystemZ::CCMASK_CMP_UO;
+  }
+}
+
 // If C can be converted to a comparison against zero, ...
[truncated]

llvmbot avatar Feb 06 '25 00:02 llvmbot

[like] Anoop Kumar reacted to your message:


From: Ulrich Weigand @.> Sent: Friday, April 25, 2025 2:30:52 PM To: llvm/llvm-project @.> Cc: Anoop Kumar @.>; Author @.> Subject: [EXTERNAL] Re: [llvm/llvm-project] Add support for flag output operand @.***" for SystemZ. (PR #125970)

@ uweigand commented on this pull request. Not a full review, just some initial comments on combineCCMask. I think it would be good to have more comments explaining the specific transformations you're attempting to implement, with an argument

@uweigand commented on this pull request.

Not a full review, just some initial comments on combineCCMask. I think it would be good to have more comments explaining the specific transformations you're attempting to implement, with an argument why they are correct for all inputs.


In llvm/lib/Target/SystemZ/SystemZISelLowering.cpp<https://github.com/llvm/llvm-project/pull/125970#discussion_r2060312364 >:

 return false;
  • // Optimize the case where CompareLHS is a SELECT_CCMASK.
  • if (CompareLHS->getOpcode() == SystemZISD::SELECT_CCMASK) {
  • // Verify that we have an appropriate mask for a EQ or NE comparison.
  • // Optimize (TM (IPM (CC)))

Adding a case to optimize (TM (IPM)) in addition to (ICMP (IPM)) does make sense in general. However, you need to take care that the optimization is correct for all possible inputs to TM, not just the ones the come up in the "good case" you're looking at. That doesn't appear to be the case here.


In llvm/lib/Target/SystemZ/SystemZISelLowering.cpp<https://github.com/llvm/llvm-project/pull/125970#discussion_r2060314372 >:

 bool Invert = false;
  • if (CCMask == SystemZ::CCMASK_CMP_NE)
  • if (CCMask == SystemZ::CCMASK_TM_SOME_1) Invert = !Invert;

There's four possible CCMask values for TM. It doesn't look all four are handled correctly. (You can of course bail out if there's some mask value you don't support, but you shouldn't make any silent assumptions.)


In llvm/lib/Target/SystemZ/SystemZISelLowering.cpp<https://github.com/llvm/llvm-project/pull/125970#discussion_r2060316349 >:

   Invert = !Invert;
  • else if (CCMask != SystemZ::CCMASK_CMP_EQ)
  • auto *N = CCNode->getOperand(0).getNode();
  • auto Shift = dyn_cast<ConstantSDNode>(CCNode->getOperand(1));

Calling the operand of TM "Shift" is confusing. There's no shift happening here; TM basically performs an "and" operation.


In llvm/lib/Target/SystemZ/SystemZISelLowering.cpp<https://github.com/llvm/llvm-project/pull/125970#discussion_r2060318970 >:

   return false;
  • // Verify that the ICMP compares against one of select values.
  • auto *TrueVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(0));
  • if (!TrueVal)
  • if (N->getOpcode() == SystemZISD::IPM) {
  •  auto ShiftVal = Shift->getZExtValue();
    
  •  if (ShiftVal == (1 << SystemZ::IPM_CC))
    
  •    CCMask = SystemZ::CCMASK_CMP_GE;
    

Well, what if the second TM operand is anything else? You'll still do the optimization here, which is likely to be incorrect.


In llvm/lib/Target/SystemZ/SystemZISelLowering.cpp<https://github.com/llvm/llvm-project/pull/125970#discussion_r2060320864 >:

  •    CCMask = SystemZ::CCMASK_CMP_GE;
    
  •  if (Invert)
    
  •    CCMask ^= CCValid;
    
  •  // Return the updated CCReg link.
    
  •  CCReg = N->getOperand(0);
    
  •  return true;
    
  • } else if (N->getOpcode() == ISD::XOR) {
  •  // Optimize (TM (XOR (OP1 OP2))).
    
  •  auto *XOROp1 = N->getOperand(0).getNode();
    
  •  auto *XOROp2 = N->getOperand(1).getNode();
    
  •  if (!XOROp1 || !XOROp2)
    
  •    return false;
    
  •  // OP1. (SELECT_CCMASK (ICMP (SRL (IPM (CC))))).
    
  •  // OP2. (SRL (IPM (CC))).
    
  •  if (XOROp1->getOpcode() == SystemZISD::SELECT_CCMASK /*&&
    
  •      isSRL_IPM_CCSequence(XOROp2)*/) {
    

I don't even fully understand what optimization you're attempting here - but this code completely ignores Op2, which is obviously incorrect.


In llvm/lib/Target/SystemZ/SystemZISelLowering.cpp<https://github.com/llvm/llvm-project/pull/125970#discussion_r2060325910 >:

  •    int CCValidVal = CCValid1->getZExtValue();
    
  •    int CCMaskVal = CCMask1->getZExtValue();
    
  •    if (combineCCMask(XORReg, CCValidVal, CCMaskVal)) {
    
  •      // CC == 0 || CC == 2 for bit 28 Test Under Mask.
    
  •      CCMask = SystemZ::CCMASK_CMP_GE;
    
  •      CCMask ^= CCMaskVal;
    
  •      if (Invert)
    
  •        CCMask ^= CCValid;
    
  •      CCReg = XORReg;
    
  •      return true;
    
  •    }
    
  •  }
    
  • }
  • }
  • // Optimize (AND (SRL (IPM (CC)))).
  • if (CCNode->getOpcode() == ISD::AND) {

Here is starts to look very confusing. The operand of the combineCCMask routine has to be some Z instruction that sets the condition code - the whole routine is about analysing condition codes set by prior instructions! A plain ISD::AND does not set any Z condition code at all, it simply has a regular (integer) output value - how would it ever be a possible input to combineCCMask? What does this even mean?


In llvm/lib/Target/SystemZ/SystemZISelLowering.cpp<https://github.com/llvm/llvm-project/pull/125970#discussion_r2060327291 >:

   return false;
  • if (CompareRHS->getAPIntValue() == FalseVal->getAPIntValue())
  •  Invert = !Invert;
    
  • else if (CompareRHS->getAPIntValue() != TrueVal->getAPIntValue())
  • // Bit 28 false (CC == 0) || (CC == 2).
  • // Caller can invert it depending on CCmask there.
  • if (ANDConst->getZExtValue() == 1) {
  •  CCMask = SystemZ::CCMASK_0 | SystemZ::CCMASK_2;
    
  •  CCValid = SystemZ::CCMASK_ANY;
    
  •  return true;
    
  • }
  • return false;
  • }
  • // (SELECT_CCMASK (ICMP (SRL (IPM (CC)))))
  • if (CCNode->getOpcode() == SystemZISD::SELECT_CCMASK) {

The same comment as above - SELECT_CCMASK (while at least a Z specific opcode) does not itself set the condition code (it uses it, of course), and so it cannot be an input to combineCCMask either.


In llvm/lib/Target/SystemZ/SystemZISelLowering.cpp<https://github.com/llvm/llvm-project/pull/125970#discussion_r2060327821 >:

  • auto *NewCCMask = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(3));
  • if (!NewCCValid || !NewCCMask)
  • int CCValidVal = CCValidNode->getZExtValue();
  • int CCMaskVal = CCMaskNode->getZExtValue();
  • SDValue CCRegOp = CCNode->getOperand(4);
  • if (combineCCMask(CCRegOp, CCValidVal, CCMaskVal)) {
  •  CCMask = CCMaskVal;
    
  •  CCValid = SystemZ::CCMASK_ANY;
    
  •  CCReg = CCRegOp;
    
  •  return true;
    
  • }
  • return false;
  • }
  • // Both oerands of XOR are (SELECT_CCMASK (ICMP (SRL (IPM (CC))))).
  • if (CCNode->getOpcode() == ISD::XOR) {

And once again an ISD::XOR cannot be an input to combineCCMask.


In llvm/lib/Target/SystemZ/SystemZISelLowering.cpp<https://github.com/llvm/llvm-project/pull/125970#discussion_r2060344904 >:

  • if (!RHS) {
  • SDValue CmpOp1 = CCNode->getOperand(0);
  • SDValue CmpOp2 = CCNode->getOperand(1);
  • auto *CmpNode1 = CmpOp1.getNode(), *CmpNode2 = CmpOp2.getNode();
  • if (!CmpNode1 || !CmpNode2)
  •  return false;
    
  • if (CmpNode1->getOpcode() == SystemZISD::SELECT_CCMASK ||
  •    CmpNode2->getOpcode() == SystemZISD::SELECT_CCMASK) {
    
  •  SDValue CmpOp =
    
  •      CmpNode1->getOpcode() == SystemZISD::SELECT_CCMASK ? CmpOp2 : CmpOp1;
    
  •  SDNode *SelectCC = CmpNode1->getOpcode() == SystemZISD::SELECT_CCMASK
    
  •                         ? CmpNode1
    
  •                         : CmpNode2;
    
  •  int CmpCCValid = CCValid, SelectCCValid = CCValid;
    
  •  int CmpCCMask = CCMask, SelectCCMask = CCMask;
    
  •  bool IsOp1 = combineCCMask(CmpOp, CmpCCValid, CmpCCMask);
    

This calls combineCCMask on some random operation that does not set a condition code - is this why you end up in some of those cases above? That doesn't make sense. What is the actual optimization this code path is supposed to achieve?


In llvm/lib/Target/SystemZ/SystemZISelLowering.cpp<https://github.com/llvm/llvm-project/pull/125970#discussion_r2060358072 >:

}

  • int CmpVal = RHS->getZExtValue();
  • // (BR_CC (ICMP (SELECT_CCMASK (CC))))
  • if (LHS->getOpcode() == SystemZISD::SELECT_CCMASK) {
  • int CCVal = RHS->getZExtValue();
  • int Mask = CCMaskForICmpEQCCVal(CCVal);
  • bool Invert = false;
  • if (CCMask == SystemZ::CCMASK_CMP_NE)
  •  Invert = !Invert;
    
  • SDValue NewCCReg = CCNode->getOperand(0);
  • if (combineCCMask(NewCCReg, CCValid, CCMask)) {

Again a recursive call with an opcode that does not set CC.


In llvm/lib/Target/SystemZ/SystemZISelLowering.cpp<https://github.com/llvm/llvm-project/pull/125970#discussion_r2060358436 >:

  • if (LHS->getOpcode() == SystemZISD::SELECT_CCMASK) {
  • int CCVal = RHS->getZExtValue();
  • int Mask = CCMaskForICmpEQCCVal(CCVal);
  • bool Invert = false;
  • if (CCMask == SystemZ::CCMASK_CMP_NE)
  •  Invert = !Invert;
    
  • SDValue NewCCReg = CCNode->getOperand(0);
  • if (combineCCMask(NewCCReg, CCValid, CCMask)) {
  •  CCMask |= Mask;
    
  •  if (Invert)
    
  •    CCMask ^= SystemZ::CCMASK_ANY;
    
  •  CCReg = NewCCReg;
    
  •  CCValid = SystemZ::CCMASK_ANY;
    
  •  return true;
    
  • } else if (CCMask == SystemZ::CCMASK_CMP_NE ||
  •           CCMask != SystemZ::CCMASK_CMP_EQ) {
    

This condition looks incorrect.

— Reply to this email directly, view it on GitHub<https://github.com/llvm/llvm-project/pull/125970#pullrequestreview-2794248049 >, or unsubscribe<https://github.com/notifications/unsubscribe-auth/BM5K4GTSFUQ33NTN6K4DE4D23JBJZAVCNFSM6AAAAABWSJVEQKVHI2DSMVQWIX3LMV43YUDVNRWFEZLROVSXG5CSMV3GSZLXHMZDOOJUGI2DQMBUHE >. You are receiving this because you authored the thread.Message ID: @.***>

anoopkg6 avatar Apr 28 '25 18:04 anoopkg6

Hi Ulrich,

I have taken one example from code review feedback. I will incorporate more changes to code after I understand. I have commented on xor in this example code changes.

This example is for code review comment on line 8879. 88979 +if (LHS->getOpcode() == ISD::XOR) {


test.c

static attribute((always_inline)) inline int __atomic_dec_and_test_023(int ptr) { int cc; asm volatile( " alsi %[ptr],-1\n" : @.**" (cc), [ptr] "+QS" (*ptr) : : "memory"); return (cc == 0) ^ (cc == 2) ^ (cc == 3); } int a; long fu_023(void) { if (__atomic_dec_and_test_023(&a)) return 5; return 8;

Initial input DAG to combineCCMask

Function: fu_023 SelectionDAG has 38 nodes: t0: ch,glue = EntryToken t8: ch,glue = inlineasm t0, TargetExternalSymbol:i64' alsi $1,-1 ', MDNode:ch<0x1b4dec58>, TargetConstant:i64<25>, TargetConstant:i32<458762>, Register:i32 %0, TargetConstant:i32<524302>, t51, TargetConstant:i32<524302>, t51 t10: i32,ch,glue = CopyFromReg t8, Register:i32 $cc, t8:1 t11: i32 = SystemZISD::IPM t10 t13: i32 = srl t11, Constant:i32<28> t47: i32 = SystemZISD::ICMP t13, Constant:i32<3>, TargetConstant:i32<0> t49: i32 = SystemZISD::SELECT_CCMASK Constant:i32<1>, Constant:i32<0>, TargetConstant:i32<14>, TargetConstant:i32<6>, t47 t40: i32 = SystemZISD::ICMP t13, Constant:i32<0>, TargetConstant:i32<0> t44: i32 = SystemZISD::SELECT_CCMASK Constant:i32<1>, Constant:i32<0>, TargetConstant:i32<14>, TargetConstant:i32<8>, t40 t45: i32 = SystemZISD::ICMP t13, Constant:i32<2>, TargetConstant:i32<0> t46: i32 = SystemZISD::SELECT_CCMASK Constant:i32<1>, Constant:i32<0>, TargetConstant:i32<14>, TargetConstant:i32<8>, t45 t32: i32 = xor t44, t46 t34: i32 = xor t49, t32 t52: i32 = SystemZISD::ICMP t34, Constant:i32<0>, TargetConstant:i32<0> t53: i64 = SystemZISD::SELECT_CCMASK Constant:i64<8>, Constant:i64<5>, TargetConstant:i32<14>, TargetConstant:i32<6>, t52 t28: ch,glue = CopyToReg t10:1, Register:i64 $r2d, t53 t51: i64 = SystemZISD::PCREL_WRAPPER TargetGlobalAddress:i64<ptr @a> 0 t29: ch = SystemZISD::RET_GLUE t28, Register:i64 $r2d, t28:1


Above is the Initial Input DAG. t32: i32 = xor t44, t46 t34: i32 = xor t49, t32

t32 xor has t49 and t32 operands. It calls combineCCMask on t49 in recursion to combine SystemZISD::SELECT_CCMASK with t10. (SELECT_CCMASK (ICMP (SRL (IPM t10)))), effectively looking sub-expression (ICMP (SRL (IPM t10)), checking for isSRL_IPM_CCSequence(). t49 fourth operand - t47 is replaced with t10 after combining the sequence. Recursion Depth = 1 for combineCCMask for combining SELECT_CCMASK t49 to t10. CCMask for t49 = 0x1

CombineCCMask is called on t32, which is xor again whose both operands, t44 and t46, are SystemZISD::SELECT_CCMASK. CombineCCMask is called on xor which combines t44 (SELECT_CCMASK (ICMP (SRL (IPM t10)))) sequence with t0. Similarly, for t46. t44 and t46 are replaced with t10 after combining. Recursion Depth = 2, one for calling for t32 xor, which in turns calls CombineCCMask for t44 to combine it with t10 for (SELECT_CCMASK (ICMP (SRL (IPM t10) ))) and comes back and again calls CombineCCMask for t46 to combine it with t10 for (SELECT_CCMASK (ICMP (SRL (IPM t10)))). Effectively checking isSRL_IPM_CCSequence. CCMask for t44 = 0x1000 and t46 = 0x0010.

t53 has fourth operands t52, which is combined with t34, which is already been combined with t10. Whole DAG after combining results in one SELECT_CCMASK combined with t10 with cumulative computed CCMask = 0x1011.

Recursion depth is mostly 0 and 1 in a few cases. Recursion depth is 2 only in very special cases like this.


Final output after combineCCMask t53 fourth argument t52 is combine with t10 .

***** in combineSELECT_CCMASK t53: i64 = <<Unknown Node #513>> Constant:i64<8>, Constant:i64<5>, TargetConstant:i32<14>, TargetConstant:i32<6>, t52 --------Anoop combineSELECT_CC_CCIPMMASK Function: fu_023 SelectionDAG has 20 nodes: t0: ch,glue = EntryToken t8: ch,glue = inlineasm t0, TargetExternalSymbol:i64' alsi $1,-1 ', MDNode:ch<0x1b4dec58>, TargetConstant:i64<25>, TargetConstant:i32<458762>, Register:i32 %0, TargetConstant:i32<524302>, t51, TargetConstant:i32<524302>, t51 t10: i32,ch,glue = CopyFromReg t8, Register:i32 $cc, t8:1 t56: i64 = SystemZISD::SELECT_CCMASK Constant:i64<8>, Constant:i64<5>, TargetConstant:i32<15>, TargetConstant:i32<4>, t10 // All 3 select_cc combined t28: ch,glue = CopyToReg t10:1, Register:i64 $r2d, t56 t51: i64 = SystemZISD::PCREL_WRAPPER TargetGlobalAddress:i64<ptr @a> 0 t29: ch = SystemZISD::RET_GLUE t28, Register:i64 $r2d, t28:1

Assembly test.s fu_023: # @fu_023

%bb.0: # %entry

    larl    %r1, a
    #APP
    alsi    0(%r1), -1

    #NO_APP
    lghi    %r2, 8
    blr     %r14

.LBB0_1: # %entry lghi %r2, 5 br %r14 .Lfunc_end0: .size fu_023, .Lfunc_end0-fu_023 # -- End function .type @.*** # @a .section .@.*** .globl a

I am attaching ISD::XOR code and git diff for SystemZISelLowering.cpp and xor code file relevant portion for xor code for above example to illustrate this example. This is not for review as I am still adding comments and some explicit return false statements if DAG pattern input will not be combined. Though, for good cases combineCCMask combines, update CCReg to t10 in this example, update computed CCMask by reference and returns true to the caller where DAG node is replaces select_cc with updated CCReg(t10) and CCMask. After combining with t10, srl/ipm sequence get optimized. Thanks, Anoop


From: Ulrich Weigand @.> Sent: Friday, April 25, 2025 9:30 AM To: llvm/llvm-project @.> Cc: Anoop Kumar @.>; Author @.> Subject: [EXTERNAL] Re: [llvm/llvm-project] Add support for flag output operand @.***" for SystemZ. (PR #125970)

@ uweigand commented on this pull request. Not a full review, just some initial comments on combineCCMask. I think it would be good to have more comments explaining the specific transformations you're attempting to implement, with an argument

@uweigand commented on this pull request.

Not a full review, just some initial comments on combineCCMask. I think it would be good to have more comments explaining the specific transformations you're attempting to implement, with an argument why they are correct for all inputs.


In llvm/lib/Target/SystemZ/SystemZISelLowering.cpp<https://github.com/llvm/llvm-project/pull/125970#discussion_r2060312364 >:

 return false;
  • // Optimize the case where CompareLHS is a SELECT_CCMASK.
  • if (CompareLHS->getOpcode() == SystemZISD::SELECT_CCMASK) {
  • // Verify that we have an appropriate mask for a EQ or NE comparison.
  • // Optimize (TM (IPM (CC)))

Adding a case to optimize (TM (IPM)) in addition to (ICMP (IPM)) does make sense in general. However, you need to take care that the optimization is correct for all possible inputs to TM, not just the ones the come up in the "good case" you're looking at. That doesn't appear to be the case here.


In llvm/lib/Target/SystemZ/SystemZISelLowering.cpp<https://github.com/llvm/llvm-project/pull/125970#discussion_r2060314372 >:

 bool Invert = false;
  • if (CCMask == SystemZ::CCMASK_CMP_NE)
  • if (CCMask == SystemZ::CCMASK_TM_SOME_1) Invert = !Invert;

There's four possible CCMask values for TM. It doesn't look all four are handled correctly. (You can of course bail out if there's some mask value you don't support, but you shouldn't make any silent assumptions.)


In llvm/lib/Target/SystemZ/SystemZISelLowering.cpp<https://github.com/llvm/llvm-project/pull/125970#discussion_r2060316349 >:

   Invert = !Invert;
  • else if (CCMask != SystemZ::CCMASK_CMP_EQ)
  • auto *N = CCNode->getOperand(0).getNode();
  • auto Shift = dyn_cast<ConstantSDNode>(CCNode->getOperand(1));

Calling the operand of TM "Shift" is confusing. There's no shift happening here; TM basically performs an "and" operation.


In llvm/lib/Target/SystemZ/SystemZISelLowering.cpp<https://github.com/llvm/llvm-project/pull/125970#discussion_r2060318970 >:

   return false;
  • // Verify that the ICMP compares against one of select values.
  • auto *TrueVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(0));
  • if (!TrueVal)
  • if (N->getOpcode() == SystemZISD::IPM) {
  •  auto ShiftVal = Shift->getZExtValue();
    
  •  if (ShiftVal == (1 << SystemZ::IPM_CC))
    
  •    CCMask = SystemZ::CCMASK_CMP_GE;
    

Well, what if the second TM operand is anything else? You'll still do the optimization here, which is likely to be incorrect.


In llvm/lib/Target/SystemZ/SystemZISelLowering.cpp<https://github.com/llvm/llvm-project/pull/125970#discussion_r2060320864 >:

  •    CCMask = SystemZ::CCMASK_CMP_GE;
    
  •  if (Invert)
    
  •    CCMask ^= CCValid;
    
  •  // Return the updated CCReg link.
    
  •  CCReg = N->getOperand(0);
    
  •  return true;
    
  • } else if (N->getOpcode() == ISD::XOR) {
  •  // Optimize (TM (XOR (OP1 OP2))).
    
  •  auto *XOROp1 = N->getOperand(0).getNode();
    
  •  auto *XOROp2 = N->getOperand(1).getNode();
    
  •  if (!XOROp1 || !XOROp2)
    
  •    return false;
    
  •  // OP1. (SELECT_CCMASK (ICMP (SRL (IPM (CC))))).
    
  •  // OP2. (SRL (IPM (CC))).
    
  •  if (XOROp1->getOpcode() == SystemZISD::SELECT_CCMASK /*&&
    
  •      isSRL_IPM_CCSequence(XOROp2)*/) {
    

I don't even fully understand what optimization you're attempting here - but this code completely ignores Op2, which is obviously incorrect.


In llvm/lib/Target/SystemZ/SystemZISelLowering.cpp<https://github.com/llvm/llvm-project/pull/125970#discussion_r2060325910 >:

  •    int CCValidVal = CCValid1->getZExtValue();
    
  •    int CCMaskVal = CCMask1->getZExtValue();
    
  •    if (combineCCMask(XORReg, CCValidVal, CCMaskVal)) {
    
  •      // CC == 0 || CC == 2 for bit 28 Test Under Mask.
    
  •      CCMask = SystemZ::CCMASK_CMP_GE;
    
  •      CCMask ^= CCMaskVal;
    
  •      if (Invert)
    
  •        CCMask ^= CCValid;
    
  •      CCReg = XORReg;
    
  •      return true;
    
  •    }
    
  •  }
    
  • }
  • }
  • // Optimize (AND (SRL (IPM (CC)))).
  • if (CCNode->getOpcode() == ISD::AND) {

Here is starts to look very confusing. The operand of the combineCCMask routine has to be some Z instruction that sets the condition code - the whole routine is about analysing condition codes set by prior instructions! A plain ISD::AND does not set any Z condition code at all, it simply has a regular (integer) output value - how would it ever be a possible input to combineCCMask? What does this even mean?


In llvm/lib/Target/SystemZ/SystemZISelLowering.cpp<https://github.com/llvm/llvm-project/pull/125970#discussion_r2060327291 >:

   return false;
  • if (CompareRHS->getAPIntValue() == FalseVal->getAPIntValue())
  •  Invert = !Invert;
    
  • else if (CompareRHS->getAPIntValue() != TrueVal->getAPIntValue())
  • // Bit 28 false (CC == 0) || (CC == 2).
  • // Caller can invert it depending on CCmask there.
  • if (ANDConst->getZExtValue() == 1) {
  •  CCMask = SystemZ::CCMASK_0 | SystemZ::CCMASK_2;
    
  •  CCValid = SystemZ::CCMASK_ANY;
    
  •  return true;
    
  • }
  • return false;
  • }
  • // (SELECT_CCMASK (ICMP (SRL (IPM (CC)))))
  • if (CCNode->getOpcode() == SystemZISD::SELECT_CCMASK) {

The same comment as above - SELECT_CCMASK (while at least a Z specific opcode) does not itself set the condition code (it uses it, of course), and so it cannot be an input to combineCCMask either.


In llvm/lib/Target/SystemZ/SystemZISelLowering.cpp<https://github.com/llvm/llvm-project/pull/125970#discussion_r2060327821 >:

  • auto *NewCCMask = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(3));
  • if (!NewCCValid || !NewCCMask)
  • int CCValidVal = CCValidNode->getZExtValue();
  • int CCMaskVal = CCMaskNode->getZExtValue();
  • SDValue CCRegOp = CCNode->getOperand(4);
  • if (combineCCMask(CCRegOp, CCValidVal, CCMaskVal)) {
  •  CCMask = CCMaskVal;
    
  •  CCValid = SystemZ::CCMASK_ANY;
    
  •  CCReg = CCRegOp;
    
  •  return true;
    
  • }
  • return false;
  • }
  • // Both oerands of XOR are (SELECT_CCMASK (ICMP (SRL (IPM (CC))))).
  • if (CCNode->getOpcode() == ISD::XOR) {

And once again an ISD::XOR cannot be an input to combineCCMask.


In llvm/lib/Target/SystemZ/SystemZISelLowering.cpp<https://github.com/llvm/llvm-project/pull/125970#discussion_r2060344904 >:

  • if (!RHS) {
  • SDValue CmpOp1 = CCNode->getOperand(0);
  • SDValue CmpOp2 = CCNode->getOperand(1);
  • auto *CmpNode1 = CmpOp1.getNode(), *CmpNode2 = CmpOp2.getNode();
  • if (!CmpNode1 || !CmpNode2)
  •  return false;
    
  • if (CmpNode1->getOpcode() == SystemZISD::SELECT_CCMASK ||
  •    CmpNode2->getOpcode() == SystemZISD::SELECT_CCMASK) {
    
  •  SDValue CmpOp =
    
  •      CmpNode1->getOpcode() == SystemZISD::SELECT_CCMASK ? CmpOp2 : CmpOp1;
    
  •  SDNode *SelectCC = CmpNode1->getOpcode() == SystemZISD::SELECT_CCMASK
    
  •                         ? CmpNode1
    
  •                         : CmpNode2;
    
  •  int CmpCCValid = CCValid, SelectCCValid = CCValid;
    
  •  int CmpCCMask = CCMask, SelectCCMask = CCMask;
    
  •  bool IsOp1 = combineCCMask(CmpOp, CmpCCValid, CmpCCMask);
    

This calls combineCCMask on some random operation that does not set a condition code - is this why you end up in some of those cases above? That doesn't make sense. What is the actual optimization this code path is supposed to achieve?


In llvm/lib/Target/SystemZ/SystemZISelLowering.cpp<https://github.com/llvm/llvm-project/pull/125970#discussion_r2060358072 >:

}

  • int CmpVal = RHS->getZExtValue();
  • // (BR_CC (ICMP (SELECT_CCMASK (CC))))
  • if (LHS->getOpcode() == SystemZISD::SELECT_CCMASK) {
  • int CCVal = RHS->getZExtValue();
  • int Mask = CCMaskForICmpEQCCVal(CCVal);
  • bool Invert = false;
  • if (CCMask == SystemZ::CCMASK_CMP_NE)
  •  Invert = !Invert;
    
  • SDValue NewCCReg = CCNode->getOperand(0);
  • if (combineCCMask(NewCCReg, CCValid, CCMask)) {

Again a recursive call with an opcode that does not set CC.


In llvm/lib/Target/SystemZ/SystemZISelLowering.cpp<https://github.com/llvm/llvm-project/pull/125970#discussion_r2060358436 >:

  • if (LHS->getOpcode() == SystemZISD::SELECT_CCMASK) {
  • int CCVal = RHS->getZExtValue();
  • int Mask = CCMaskForICmpEQCCVal(CCVal);
  • bool Invert = false;
  • if (CCMask == SystemZ::CCMASK_CMP_NE)
  •  Invert = !Invert;
    
  • SDValue NewCCReg = CCNode->getOperand(0);
  • if (combineCCMask(NewCCReg, CCValid, CCMask)) {
  •  CCMask |= Mask;
    
  •  if (Invert)
    
  •    CCMask ^= SystemZ::CCMASK_ANY;
    
  •  CCReg = NewCCReg;
    
  •  CCValid = SystemZ::CCMASK_ANY;
    
  •  return true;
    
  • } else if (CCMask == SystemZ::CCMASK_CMP_NE ||
  •           CCMask != SystemZ::CCMASK_CMP_EQ) {
    

This condition looks incorrect.

— Reply to this email directly, view it on GitHub<https://github.com/llvm/llvm-project/pull/125970#pullrequestreview-2794248049 >, or unsubscribe<https://github.com/notifications/unsubscribe-auth/BM5K4GTSFUQ33NTN6K4DE4D23JBJZAVCNFSM6AAAAABWSJVEQKVHI2DSMVQWIX3LMV43YUDVNRWFEZLROVSXG5CSMV3GSZLXHMZDOOJUGI2DQMBUHE >. You are receiving this because you authored the thread.Message ID: @.***>

@@ -8797,14 +8797,21 @@ static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) { bool Invert = false; if (CCMask == SystemZ::CCMASK_TM_SOME_1) Invert = !Invert;

  • else if (CCMask != SystemZ::CCMASK_TM_ALL_0)
  •  return false;
    
    auto *N = CCNode->getOperand(0).getNode();
  • auto Shift = dyn_cast<ConstantSDNode>(CCNode->getOperand(1));
  • if (!N || !Shift)
  • auto *TMOp1Const = dyn_cast<ConstantSDNode>(CCNode->getOperand(1));
  • auto *TMOp2Const = dyn_cast<ConstantSDNode>(CCNode->getOperand(2));
  • if (!N || !TMOp1Const || !TMOp2Const || TMOp2Const->getZExtValue() != 0) return false;
  • auto TMConstVal = TMOp1Const->getZExtValue(); if (N->getOpcode() == SystemZISD::IPM) {
  •  auto ShiftVal = Shift->getZExtValue();
    
  •  if (ShiftVal == (1 << SystemZ::IPM_CC))
    
  •  if (TMConstVal == (1 << SystemZ::IPM_CC))
       CCMask = SystemZ::CCMASK_CMP_GE;
    
  •  else if (TMConstVal == (1 << (SystemZ::IPM_CC + 1)))
    
  •    CCMask = SystemZ::CCMASK_CMP_LE;
    
  •  else
    
  •    return false;
     if (Invert)
       CCMask ^= CCValid;
     // Return the updated CCReg link.
    

@@ -8818,8 +8825,7 @@ static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) { return false; // OP1. (SELECT_CCMASK (ICMP (SRL (IPM (CC))))). // OP2. (SRL (IPM (CC))).

  •  if (XOROp1->getOpcode() == SystemZISD::SELECT_CCMASK /*&&
    
  •      isSRL_IPM_CCSequence(XOROp2)*/) {
    
  •  if (XOROp1->getOpcode() == SystemZISD::SELECT_CCMASK) {
       auto *CCValid1 = dyn_cast<ConstantSDNode>(XOROp1->getOperand(2));
       auto *CCMask1 = dyn_cast<ConstantSDNode>(XOROp1->getOperand(3));
       SDValue XORReg = XOROp1->getOperand(4);
    

@@ -8827,8 +8833,12 @@ static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) { return false; int CCValidVal = CCValid1->getZExtValue(); int CCMaskVal = CCMask1->getZExtValue();

  •    if (combineCCMask(XORReg, CCValidVal, CCMaskVal)) {
    
  •    // (SELECT_CCMASK (ICMP (SRL (IPM (CC))))).
    
  •    if (combineCCMask(XORReg, CCValidVal, CCMaskVal) &&
    
  •        isSRL_IPM_CCSequence(XOROp2)) {
         // CC == 0 || CC == 2 for bit 28 Test Under Mask.
    
  •      if (TMConstVal != 1)
    
  •        return false;
         CCMask = SystemZ::CCMASK_CMP_GE;
         CCMask ^= CCMaskVal;
         if (Invert)
    

@@ -8840,6 +8850,14 @@ static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) { } } // Optimize (AND (SRL (IPM (CC)))).

  • // One use case it is being called from combineSELECT_CC_CCIPMMASK where
  • // subtree select_cc operand has already been computed and other operand
  • // and-exp has to evaluated. combineSELECT_CC_CCIPMMASK calls combineCCMask
  • // for and-exp. This is also one of very few cases where ICMP has both
  • // operands non-const. Below has ICMP code where already-computed-select_cc
  • // and and-exp are compared.
  • // (BR_CCMASK (ICMP (already-combined_computed-select_cc_mask and-exp)))
  • // and-exp - (AND (SRL (IPM (CC)))). if (CCNode->getOpcode() == ISD::AND) { auto *N = CCNode->getOperand(0).getNode(); if (!isSRL_IPM_CCSequence(N)) @@ -8848,9 +8866,9 @@ static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) { if (!ANDConst) return false; // Bit 28 false (CC == 0) || (CC == 2).
  • // Caller can invert it depending on CCmask there.
  • // Caller can invert it depending on CCMask there. if (ANDConst->getZExtValue() == 1) {
  •  CCMask = SystemZ::CCMASK_0 | SystemZ::CCMASK_2;
    
  •  CCMask = SystemZ::CCMASK_CMP_GE;
     CCValid = SystemZ::CCMASK_ANY;
     return true;
    
    } @@ -8866,6 +8884,7 @@ static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) { int CCValidVal = CCValidNode->getZExtValue(); int CCMaskVal = CCMaskNode->getZExtValue(); SDValue CCRegOp = CCNode->getOperand(4);
  • // (SELECT_CCMASK (ICMP (SRL (IPM (CC))))). if (combineCCMask(CCRegOp, CCValidVal, CCMaskVal)) { CCMask = CCMaskVal; CCValid = SystemZ::CCMASK_ANY; @@ -8899,7 +8918,9 @@ static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) { int CCMaskVal2 = CCMask2->getZExtValue(); SDValue CCReg1 = XOROp1->getOperand(4); SDValue CCReg2 = XOROp2->getOperand(4);
  •  // (ICMP (SRL (IPM (CC)))).
     if (!combineCCMask(CCReg1, CCValidVal1, CCMaskVal1) ||
    
  •      // (ICMP (SRL (IPM (CC)))).
         !combineCCMask(CCReg2, CCValidVal2, CCMaskVal2))
       return false;
     CCMask = CCMaskVal1 ^ CCMaskVal2;
    

@@ -8919,8 +8940,10 @@ static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) { if (!LHS || LHS->getOpcode() == ISD::Constant) return false;

  • // (BR_CC (ICMP (Op1 Op2))), Op1 Op2 will have (SRL (IPM (CC))) sequence.
  • // SystemZ::ICMP second operand is not constant.
  • // (BR_CC (ICMP (Op1 Op2))), SystemZ::ICMP has both operands Op1 and Op2
  • // non-const. One use case:
  • // (BR_CCMASK (ICMP (SELECT_CCMASK (ICMP (SRL (IPM CC))))
  • // already-combined_computed-select_cc_mask))) if (!RHS) { SDValue CmpOp1 = CCNode->getOperand(0); SDValue CmpOp2 = CCNode->getOperand(1); @@ -8936,7 +8959,10 @@ static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) { : CmpNode2; int CmpCCValid = CCValid, SelectCCValid = CCValid; int CmpCCMask = CCMask, SelectCCMask = CCMask;
  •  // combine (SELECT_CCMASK (ICMP (SRL (IPM CC))))
     bool IsOp1 = combineCCMask(CmpOp, CmpCCValid, CmpCCMask);
    
  •  // subtree SELECT_CCMASK is already combined with CC, has CCMASK already
    
  •  // been computed. Just ceck ISOp1 and IsOp2 refer to same CC.
     bool IsOp2 = isSameCCIPMOp(CmpOp, SelectCC, SelectCCValid, SelectCCMask);
     if (IsOp1 && IsOp2) {
       CCMask = CmpCCMask ^ SelectCCMask;
    

@@ -8948,7 +8974,7 @@ static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) { return false; } int CmpVal = RHS->getZExtValue();

  • // (BR_CC (ICMP (SELECT_CCMASK (CC))))
  • // (BR_CC (ICMP (SELECT_CCMASK (ICMP (SRL (IPM CC)))))) if (LHS->getOpcode() == SystemZISD::SELECT_CCMASK) { int CCVal = RHS->getZExtValue(); int Mask = CCMaskForICmpEQCCVal(CCVal); @@ -8956,6 +8982,7 @@ static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) { if (CCMask == SystemZ::CCMASK_CMP_NE) Invert = !Invert; SDValue NewCCReg = CCNode->getOperand(0);
  • // (SELECT_CCMASK (ICMP (SRL (IPM CC)))) if (combineCCMask(NewCCReg, CCValid, CCMask)) { CCMask |= Mask; if (Invert) @@ -8964,8 +8991,8 @@ static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) { CCValid = SystemZ::CCMASK_ANY; return true; } else if (CCMask == SystemZ::CCMASK_CMP_NE ||
  •           CCMask != SystemZ::CCMASK_CMP_EQ) {
    
  •  // Original combineCCMask.
    
  •           CCMask == SystemZ::CCMASK_CMP_EQ) {
    
  •  // Original combineCCMask code before flag output operand.
     // Verify that the ICMP compares against one of select values.
     auto *TrueVal = dyn_cast<ConstantSDNode>(LHS->getOperand(0));
     if (!TrueVal)
    

@@ -8994,7 +9021,7 @@ static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) { } return false; }

  • // (BR_CC (ICMP OR ((SRL (IPM (CC))) (SELECT_CCMASK (CC)))))
  • // (BR_CC (ICMP (OR (Op1 Op2)))). if (LHS->getOpcode() == ISD::OR) { bool Invert = false; if (CCMask == SystemZ::CCMASK_CMP_NE) @@ -9009,6 +9036,8 @@ static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) { if (!IsOp1 && !IsOp2) { return false; }
  •  // Both Op1 and Op2 are non-const.
    
  •  // Op1 and Op2 can be any of the pattern combined in combineCCMask.
     if (IsOp1 && IsOp2) {
       NewCCMask = NewCCMask1 | NewCCMask2;
       bool IsEqualCmpVal = NewCCMask == CmpVal;
    

@@ -9021,9 +9050,11 @@ static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) { return true; } } else if (isa<ConstantSDNode>(OrOp2)) {

  •  // Op1 is const. Op2 is (SRL (IPM (CC)).
     if (isSRL_IPM_CCSequence(OrOp1.getNode())) {
       auto *OrConst = dyn_cast<ConstantSDNode>(OrOp2);
       int OrConstVal = OrConst->getZExtValue();
    
  •    // %2 = or disjoint i32 %0, -4.
       if (!OrConst || (OrConstVal & 0x3))
         return false;
       // setullt unsigned(-2), mask = 0x1100
    

@@ -9037,7 +9068,7 @@ static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) { } return false; }

  • // (BR_CC (ICMP AND ((SRL (IPM (CC))) (SELECT_CCMASK (CC)))))
  • // (BR_CC (ICMP AND (Op1 Op2) if (LHS->getOpcode() == ISD::AND) { bool Invert = false; if (CCMask == SystemZ::CCMASK_CMP_NE) @@ -9047,10 +9078,18 @@ static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) { int NewCCMask1 = CCMask, NewCCMask2 = CCMask, NewCCMask; int CCValid1 = CCValid, CCValid2 = CCValid; if (!isa<ConstantSDNode>(AndOp1) && !isa<ConstantSDNode>(AndOp2)) {
  •  // (SRL (IPM (CC))).
     bool IsOp1 = combineCCMask(AndOp1, CCValid1, NewCCMask1);
    
  •  // (SELECT_CCMASK (ICMP (SRL (IPM CC)))).
     bool IsOp2 = combineCCMask(AndOp2, CCValid2, NewCCMask2);
    
  •  // Both Op1 and Op2 are const.
     if (!IsOp1 && !IsOp2)
       return false;
    
  •  // Op1 and Op2 can be any of the pattern combined in combineCCMask.
    
  •  // e.g. %2 = or i1 %cmp, %cmp2, %2 = or i1 %xor8, %cmp4, t28,
    
  •  // i32 = or t27, t26 or (%2 = or i1 %or.cond, %cmp3,
    
  •  // %cmp3 = icmp eq i32 %asmresult1,
    
  •  // %or.cond = icmp samesign ult i32 %asmresult1, 2) sequence.
     if (IsOp1 && IsOp2) {
       NewCCMask = NewCCMask1 & NewCCMask2;
       bool IsEqualCmpVal = NewCCMask == CmpVal;
    

@@ -9062,6 +9101,9 @@ static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) { CCValid = SystemZ::CCMASK_ANY; return true; } else {

  •    // Either Op1 or Op2 is (SRL (IPM (CC))) sequence.
    
  •    // and other Op can be one of any of pattern to be combined
    
  •    // mentioned in some examples above.
       if (IsOp1 && isSRL_IPM_CCSequence(AndOp2.getNode()))
         NewCCMask = NewCCMask1;
       else if (isSRL_IPM_CCSequence(AndOp2.getNode()) && IsOp2)
    

@@ -9133,7 +9175,11 @@ static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) { bool Invert = false; if (CCMask == SystemZ::CCMASK_CMP_NE) Invert = !Invert;

  • // If both the operands are select_cc.
  • // If both the operands of XOR are
  • // (XOR (SELECT_CCMASK (ICMP (SRL (IPM (CC)))))).
  • // It will get ccombined in recursion base case both operands are xor.
  • // t32: i32 = xor t44, t46 where t44 and t46 are select_cc as
  • // (XOR (SELECT_CCMASK (ICMP (SRL (IPM (CC)))))). if (combineCCMask(XORReg, CCValid, CCMask)) { CCReg = XORReg; CCValid = SystemZ::CCMASK_ANY; @@ -9141,6 +9187,9 @@ static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) { } // Handle the case when one of the operand is select_cc and other operand // could be xor again having both operands as select_cc.
  • // t32: i32 = xor t44, t46, where t44 and t46 are select_cc
  • // (XOR (SELECT_CCMASK (ICMP (SRL (IPM (CC)))))).
  • // t34: i32 = xor t49, t32, where t49 is select_cc and t32 xor base case. auto *XOROp1 = LHS->getOperand(0).getNode(); auto *XOROp2 = LHS->getOperand(1).getNode(); if (!XOROp1 || !XOROp2) @@ -9158,7 +9207,10 @@ static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) { SDValue XORReg1 = XOROp->getOperand(4); SDValue XORReg2 = LHS->getOperand(1); int CCMaskVal1 = CCMaskVal, CCMaskVal2 = CCMaskVal;
  •  // (ICMP (SRL (IPM (CC)))).
     if (combineCCMask(XORReg1, CCValidVal, CCMaskVal1) &&
    
  •      // XOR base case t32:  (XOR (SELECT_CCMASK (ICMP (SRL (IPM (CC)))))
    
  •      // (SELECT_CCMASK (ICMP (SRL (IPM (CC)))))).
         combineCCMask(XORReg2, CCValidVal, CCMaskVal2)) {
       CCMask = CCMaskVal1 ^ CCMaskVal2;
    
    // Optimize the case where LHS is (ICMP (SRL (IPM))). if (isSRL_IPM_CCSequence(LHS)) { unsigned CCVal = RHS->getZExtValue(); if (convertCCValToCCMask(CCVal)) { CCValid = SystemZ::CCMASK_ANY; return true; } return false; }

// Both oerands of XOR are (SELECT_CCMASK (ICMP (SRL (IPM (CC))))). // t32: i32 = xor t44, t46, where t44 and t46 are select_cc if (CCNode->getOpcode() == ISD::XOR) { if (isa<ConstantSDNode>(CCNode->getOperand(0)) || isa<ConstantSDNode>(CCNode->getOperand(1))) return false; auto *XOROp1 = CCNode->getOperand(0).getNode(); auto *XOROp2 = CCNode->getOperand(1).getNode(); if (!XOROp1 || !XOROp2) return false; // Both Operands are select_cc. if (XOROp1->getOpcode() == SystemZISD::SELECT_CCMASK && XOROp2->getOpcode() == SystemZISD::SELECT_CCMASK) { auto *CCValid1 = dyn_cast<ConstantSDNode>(XOROp1->getOperand(2)); auto *CCMask1 = dyn_cast<ConstantSDNode>(XOROp1->getOperand(3)); auto *CCValid2 = dyn_cast<ConstantSDNode>(XOROp2->getOperand(2)); auto *CCMask2 = dyn_cast<ConstantSDNode>(XOROp2->getOperand(3)); if (!CCValid1 || !CCMask1 || !CCValid2 || !CCMask2) return false; int CCValidVal1 = CCValid1->getZExtValue(); int CCMaskVal1 = CCMask1->getZExtValue(); int CCValidVal2 = CCValid2->getZExtValue(); int CCMaskVal2 = CCMask2->getZExtValue(); SDValue CCReg1 = XOROp1->getOperand(4); SDValue CCReg2 = XOROp2->getOperand(4); // (ICMP (SRL (IPM (CC)))). if (!combineCCMask(CCReg1, CCValidVal1, CCMaskVal1) || // (ICMP (SRL (IPM (CC)))). !combineCCMask(CCReg2, CCValidVal2, CCMaskVal2)) return false; CCMask = CCMaskVal1 ^ CCMaskVal2; CCReg = CCReg1; CCValid = SystemZ::CCMASK_ANY; return true; } return false; }

// Optimize (ICMP (XOR (OP1 OP2))), OP1 or OP2 could be XOR again. // One or both of operands could be (SELECT_CCMASK (ICMP (SRL (IPM (CC))))). if (LHS->getOpcode() == ISD::XOR) { SDValue XORReg = CCReg->getOperand(0); bool Invert = false; if (CCMask == SystemZ::CCMASK_CMP_NE) Invert = !Invert; // If both the operands of XOR are // (XOR (SELECT_CCMASK (ICMP (SRL (IPM (CC)))))). // It will get ccombined in recursion base case both operands are xor. // t32: i32 = xor t44, t46 where t44 and t46 are select_cc as // (XOR (SELECT_CCMASK (ICMP (SRL (IPM (CC)))))). if (combineCCMask(XORReg, CCValid, CCMask)) { // will be combined in XOR code above. CCReg = XORReg; CCValid = SystemZ::CCMASK_ANY; return true; } // Handle the case when one of the operand is select_cc and other operand // could be xor again having both operands as select_cc. // t32: i32 = xor t44, t46, where t44 and t46 are select_cc // (XOR (SELECT_CCMASK (ICMP (SRL (IPM (CC)))))). // t34: i32 = xor t49, t32, where t49 is select_cc and t32 xor base case. auto *XOROp1 = LHS->getOperand(0).getNode(); auto *XOROp2 = LHS->getOperand(1).getNode(); if (!XOROp1 || !XOROp2) return false; if (XOROp1->getOpcode() == SystemZISD::SELECT_CCMASK || XOROp2->getOpcode() == SystemZISD::SELECT_CCMASK) { auto *XOROp = XOROp1->getOpcode() == SystemZISD::SELECT_CCMASK ? XOROp1 : XOROp2; auto *CCMaskNode = dyn_cast<ConstantSDNode>(XOROp->getOperand(3)); auto *CCValidNode = dyn_cast<ConstantSDNode>(XOROp->getOperand(2)); if (!CCValidNode || !CCMaskNode) return false; int CCValidVal = CCValidNode->getZExtValue(); int CCMaskVal = CCMaskNode->getZExtValue(); SDValue XORReg1 = XOROp->getOperand(4); SDValue XORReg2 = LHS->getOperand(1); int CCMaskVal1 = CCMaskVal, CCMaskVal2 = CCMaskVal; // (ICMP (SRL (IPM (CC)))). if (combineCCMask(XORReg1, CCValidVal, CCMaskVal1) && // XOR base case t32: (XOR (SELECT_CCMASK (ICMP (SRL (IPM (CC))))) // (SELECT_CCMASK (ICMP (SRL (IPM (CC)))))). combineCCMask(XORReg2, CCValidVal, CCMaskVal2)) { CCMask = CCMaskVal1 ^ CCMaskVal2; CCReg = XORReg1; CCValid = SystemZ::CCMASK_ANY; return true; } } }

anoopkg6 avatar Apr 28 '25 23:04 anoopkg6

Why restrict this to SELECT_CCMASK? If the transformation is correct for SELECT_CCMASK, it must also be correct for >BR_CCMASK.

We combine select_ccmask in combineCCMask from the call combineSELECT_CCMASK and combineBR_CCMASK. But not the case combining select_ccmask with br_ccmask (select_ccmask (br_ccmask)) or br_ccmask with br_ccmask (br_ccmask (br_ccmask)).

CCNode cannot be SRL - SRL returns a GPR, and CCNode returns a condition code. Therefore this check can never hit.

I think CCNode name should be changed. It is not returning anything, just checking (srl (ipm) pattern. It is being used as follows.

I don't know what this branch is supposed to optimize, but I notice that within that whole if block, you not once even refer to >AndOp1 - so every (and (select_ccmask) XXX) is optimized to the same thing, no matter what XXX is. That obviously cannot be >correct.

There are several cases where CC is directly being used. One interesting example.

CC == 0 || CC == 2 || CC == 3.

SelectionDAG has 29 nodes: t0: ch,glue = EntryToken t8: ch,glue = inlineasm t0, TargetExternalSymbol:i64' alsi $1,-1 ', MDNode:ch, TargetConstant:i64<25>, TargetConstant:i32<458762>, Register:i32 %0, TargetConstant:i32<524302>, t42, TargetConstant:i32<524302>, t42 t10: i32,ch,glue = CopyFromReg t8, Register:i32 $cc, t8:1 t11: i32 = SystemZISD::IPM t10 t13: i32 = srl t11, Constant:i32<28> t42: i64 = SystemZISD::PCREL_WRAPPER TargetGlobalAddress:i64<ptr @a> 0 t37: i32 = SystemZISD::ICMP t13, Constant:i32<3>, TargetConstant:i32<0> t40: i32 = SystemZISD::SELECT_CCMASK Constant:i32<1>, Constant:i32<0>, TargetConstant:i32<14>, TargetConstant:i32<6>, t37 t28: i32 = and t40, t13 t43: i32 = SystemZISD::ICMP t28, Constant:i32<0>, TargetConstant:i32<0> t44: ch = SystemZISD::BR_CCMASK t10:1, TargetConstant:i32<14>, TargetConstant:i32<6>, BasicBlock:ch<if.end 0x9f61ff0>, t43 t25: ch = br t44, BasicBlock:ch<if.then 0x9f61ed0>


From: Ulrich Weigand @.> Sent: Monday, June 30, 2025 6:32 AM To: llvm/llvm-project @.> Cc: Anoop Kumar @.>; Author @.> Subject: [EXTERNAL] Re: [llvm/llvm-project] Add support for flag output operand @.***" for SystemZ. (PR #125970)

@ uweigand commented on this pull request. In llvm/lib/Target/SystemZ/SystemZISelLowering. cpp: > + // Optimizing only the case where Op0TrueVal and Op1TrueVal are equal + // and at the same time Op0FalseVal and Op1FalseVal are also equal. 

@uweigand commented on this pull request.


In llvm/lib/Target/SystemZ/SystemZISelLowering.cpp<https://github.com/llvm/llvm-project/pull/125970#discussion_r2174845321 >:

  • // Optimizing only the case where Op0TrueVal and Op1TrueVal are equal
  • // and at the same time Op0FalseVal and Op1FalseVal are also equal.
  • auto *Op0TrueVal = dyn_cast<ConstantSDNode>(AndOp0->getOperand(0));
  • auto *Op0FalseVal = dyn_cast<ConstantSDNode>(AndOp0->getOperand(1));
  • auto *Op1TrueVal = dyn_cast<ConstantSDNode>(AndOp1->getOperand(0));
  • auto *Op1FalseVal = dyn_cast<ConstantSDNode>(AndOp1->getOperand(1));
  • if (!Op0TrueVal || !Op0FalseVal || !Op1TrueVal || !Op1FalseVal)
  •  return SDValue();
    
  • if (Op0TrueVal->getZExtValue() != Op1TrueVal->getZExtValue() ||
  •    Op0FalseVal->getZExtValue() != Op1FalseVal->getZExtValue())
    
  •  return SDValue();
    
  • // Compute the effective CC mask for select.
  • int Op0CCMaskVal = Op0CCMask->getZExtValue();
  • int Op1CCMaskVal = Op1CCMask->getZExtValue();
  • int CCMask = Op0CCMaskVal & Op1CCMaskVal;

This still isn't correct. Looking at the options for the AND case, we have in the general case:

  1. CC in CCMASK1 and CCMASK2 --> (and TRUEVAL1 TRUEVAL2)
  2. CC in CCMASK1 and not in CCMASK2 --> (and TRUEVAL1 FALSEVAL2)
  3. CC not in CCMASK1 but in CCMASK2 --> (and FALSEVAL1 TRUEVAL2)
  4. CC neither in CCMASK1 nor in CCMASK2 -> (and FALSEVAL1 FALSEVAL2)

Now, even if you assume (and check for) TRUEVAL1 == TRUEVAL2 and FALSEVAL1 == FALSEVAL2, we still have three options:

  1. CC in CCMASK1 and CCMASK2 --> TRUEVAL 1/2) CC in one but not both of CCMASK1 and CCMASK2 --> (and TRUEVAL FALSEVAL)
  2. CC neither in CCMASK1 nor in CCMASK2 -> FALSEVAL

In order to get down to two options that can be represented by a single SELECT_CCMASK, you have to impose even more constraints.


In llvm/lib/Target/SystemZ/SystemZISelLowering.cpp<https://github.com/llvm/llvm-project/pull/125970#discussion_r2174852626 >:

  •    for (SDUse &SelectBrUse : Icmp->uses()) {
    
  •      auto *SelectBr = SelectBrUse.getUser();
    
  •      if (SelectBr && (SelectBr->getOpcode() == SystemZISD::SELECT_CCMASK ||
    
  •                       SelectBr->getOpcode() == SystemZISD::BR_CCMASK))
    
  •        DCI.AddToWorklist(SelectBr);
    
  •    }
    
  •  }
    
  • }
  • return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0),
  •                   AndOp0->getOperand(0), AndOp0->getOperand(1),
    
  •                   DAG.getTargetConstant(Op0CCValidVal, SDLoc(N), MVT::i32),
    
  •                   DAG.getTargetConstant(CCMask, SDLoc(N), MVT::i32),
    
  •                   Op0CCReg);
    
  • } else if (AndOp0->getOpcode() == SystemZISD::SELECT_CCMASK) {
  • // AndOp1: (SRL (IPM (CC))).
  • // AndOp2: CC.

I don't know what this branch is supposed to optimize, but I notice that within that whole if block, you not once even refer to AndOp1 - so every (and (select_ccmask) XXX) is optimized to the same thing, no matter what XXX is. That obviously cannot be correct.


In llvm/lib/Target/SystemZ/SystemZISelLowering.cpp<https://github.com/llvm/llvm-project/pull/125970#discussion_r2174853599 >:

  • // Optimizing only the case where Op0TrueVal and Op1TrueVal are equal
  • // and at the same time Op0FalseVal and Op1FalseVal are also equal.
  • auto *Op0TrueVal = dyn_cast<ConstantSDNode>(OrOp0->getOperand(0));
  • auto *Op0FalseVal = dyn_cast<ConstantSDNode>(OrOp0->getOperand(1));
  • auto *Op1TrueVal = dyn_cast<ConstantSDNode>(OrOp1->getOperand(0));
  • auto *Op1FalseVal = dyn_cast<ConstantSDNode>(OrOp1->getOperand(1));
  • if (!Op0TrueVal || !Op0FalseVal || !Op1TrueVal || !Op1FalseVal)
  •  return SDValue();
    
  • if (Op0TrueVal->getZExtValue() != Op1TrueVal->getZExtValue() ||
  •    Op0FalseVal->getZExtValue() != Op1FalseVal->getZExtValue())
    
  •  return SDValue();
    
  • // Compute the effective CC mask for select.
  • int Op0CCMaskVal = Op0CCMask->getZExtValue();
  • int Op1CCMaskVal = Op1CCMask->getZExtValue();
  • int CCMask = Op0CCMaskVal | Op1CCMaskVal;

Same as above, this is not correct.


In llvm/lib/Target/SystemZ/SystemZISelLowering.cpp<https://github.com/llvm/llvm-project/pull/125970#discussion_r2174854033 >:

  • // Optimizing only the case where Op0TrueVal and Op1TrueVal are equal
  • // and at the same time Op0FalseVal and Op1FalseVal are also equal.
  • auto *Op0TrueVal = dyn_cast<ConstantSDNode>(XorOp0->getOperand(0));
  • auto *Op0FalseVal = dyn_cast<ConstantSDNode>(XorOp0->getOperand(1));
  • auto *Op1TrueVal = dyn_cast<ConstantSDNode>(XorOp1->getOperand(0));
  • auto *Op1FalseVal = dyn_cast<ConstantSDNode>(XorOp1->getOperand(1));
  • if (!Op0TrueVal || !Op0FalseVal || !Op1TrueVal || !Op1FalseVal)
  •  return SDValue();
    
  • if (Op0TrueVal->getZExtValue() != Op1TrueVal->getZExtValue() ||
  •    Op0FalseVal->getZExtValue() != Op1FalseVal->getZExtValue())
    
  •  return SDValue();
    
  • // Compute the effective CC mask for select.
  • int Op0CCMaskVal = Op0CCMask->getZExtValue();
  • int Op1CCMaskVal = Op1CCMask->getZExtValue();
  • int CCMask = Op0CCMaskVal ^ Op1CCMaskVal;

Same here.


In llvm/lib/Target/SystemZ/SystemZISelLowering.cpp<https://github.com/llvm/llvm-project/pull/125970#discussion_r2174857498 >:

  • auto *SRLCount = dyn_cast<ConstantSDNode>(N->getOperand(1));
  • if (!SRLCount || SRLCount->getZExtValue() != SystemZ::IPM_CC)
  •  return false;
    
  • auto *IPM = N->getOperand(0).getNode();
  • if (!IPM || IPM->getOpcode() != SystemZISD::IPM)
  •  return false;
    
  • CCReg = IPM->getOperand(0);
  • return true;
  • };
  • auto *CCNode = CCReg.getNode();
  • if (!CCNode)
  • return false;
  • // Check (SRL (IPM)) pattern and update CCReg if true.
  • if (isSRL_IPM_CCSequence(CCNode))

CCNode cannot be SRL - SRL returns a GPR, and CCNode returns a condition code. Therefore this check can never hit.


In llvm/lib/Target/SystemZ/SystemZISelLowering.cpp<https://github.com/llvm/llvm-project/pull/125970#discussion_r2174858284 >:

  • if (!IPM || IPM->getOpcode() != SystemZISD::IPM)
  •  return false;
    
  • CCReg = IPM->getOperand(0);
  • return true;
  • };
  • auto *CCNode = CCReg.getNode();
  • if (!CCNode)
  • return false;
  • // Check (SRL (IPM)) pattern and update CCReg if true.
  • if (isSRL_IPM_CCSequence(CCNode))
  • return true;
  • // Combine CCMASK_TM with select_ccmask.
  • if (CCNode->getOpcode() == SystemZISD::SELECT_CCMASK) {

Why restrict this to SELECT_CCMASK? If the transformation is correct for SELECT_CCMASK, it must also be correct for BR_CCMASK.

— Reply to this email directly, view it on GitHub<https://github.com/llvm/llvm-project/pull/125970#pullrequestreview-2970867663 >, or unsubscribe<https://github.com/notifications/unsubscribe-auth/BM5K4GSUCCTFRKHBDXETY5D3GEN5DAVCNFSM6AAAAABWSJVEQKVHI2DSMVQWIX3LMV43YUDVNRWFEZLROVSXG5CSMV3GSZLXHMZDSNZQHA3DONRWGM >. You are receiving this because you authored the thread.Message ID: @.***>

anoopkg6 avatar Jun 30 '25 13:06 anoopkg6

I am seeing a crash while building the Linux kernel for ARCH=s390 after this change.

# bad: [96da982128bf7b005afa24a8e6e41e5867d30bc4] [sanitizers] COMPILER_RT_ASAN_UNIT_TESTS_USE_HOST_RUNTIME to build only unit tests (#161455)
# good: [e8f721e621d85a2670f13307b1b99528cf5e8708] [clang][docs] Update doc and release note for probe instrumentation (#162606)
git bisect start '96da982128bf7b005afa24a8e6e41e5867d30bc4' 'e8f721e621d85a2670f13307b1b99528cf5e8708'
# bad: [da5fb5e964c213d0ec834ad0b560a523a57ce5cc] [ObjCopy][DX] Support for -dump-section flag (#159999)
git bisect bad da5fb5e964c213d0ec834ad0b560a523a57ce5cc
# bad: [69e0fd6d8dea666205fca52265f09b3eb5ee2f3d] [X86] Remove PREFETCHI from PTL (#163196)
git bisect bad 69e0fd6d8dea666205fca52265f09b3eb5ee2f3d
# good: [782dd178fcb3b146dd16792b54c867095b863ccc] [SPIRV] Do not emit @llvm.compiler.used (#162678)
git bisect good 782dd178fcb3b146dd16792b54c867095b863ccc
# good: [4a8dd4998dae8b7d67e416d20a1fa8a9451c64f5] [BOLT][NFC] Fix for a dangling reference UB (#163344)
git bisect good 4a8dd4998dae8b7d67e416d20a1fa8a9451c64f5
# bad: [d7fc7703402184792319f65570ad6a49ffe8cde7] [LLVM][DAGCombiner] Improve simplifyDivRem's effectiveness after type legalisation. (#162706)
git bisect bad d7fc7703402184792319f65570ad6a49ffe8cde7
# good: [3793e75b7af7e4908316e7869d8fc61517401865] [libc++][C++03] Cherry-pick #129348 (#162821)
git bisect good 3793e75b7af7e4908316e7869d8fc61517401865
# bad: [6712e20c5261376a6b0015fb3c8d15124757d47d] Add support for flag output operand "=@cc" for SystemZ. (#125970)
git bisect bad 6712e20c5261376a6b0015fb3c8d15124757d47d
# first bad commit: [6712e20c5261376a6b0015fb3c8d15124757d47d] Add support for flag output operand "=@cc" for SystemZ. (#125970)
$ make -skj"$(nproc)" ARCH=s390 LLVM=1 clean allmodconfig drivers/gpu/drm/nouveau/nouveau_fence.o

# Machine code for function nouveau_fence_context_kill: NoPHIs, TracksLiveness, TiedOpsRewritten
Function Live Ins: $r2d in %14, $r3d in %15, $cc, $cc

bb.0.entry:
  successors: %bb.1(0x50000000), %bb.2(0x30000000); %bb.1(62.50%), %bb.2(37.50%)
  liveins: $r2d, $r3d, $cc, $cc
  %15:gr64bit = COPY $r3d
  %14:addr64bit = COPY $r2d
  nomerge CallBRASL @__sanitizer_cov_trace_pc, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc
  $r2d = COPY %14:addr64bit
  CallBRASL @_raw_spin_lock_irqsave, $r2d, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $r2d
  %17:gr64bit = COPY $r2d
  %1:addr64bit = LA %14:addr64bit, 80, $noreg
  %18:gr64bit = SRLG %1:addr64bit, $noreg, 3
  %19:gr64bit = LLIHH 28
  %20:addr64bit = AGRK %18:gr64bit, %19:gr64bit, implicit-def dead $cc
  CLI %20:addr64bit, 0, 0, implicit-def $cc :: (load (s8) from %ir.3)
  BRC 14, 8, %bb.2, implicit $cc
  J %bb.1

bb.1 (%ir-block.6):
; predecessors: %bb.0
  successors: %bb.2(0x80000000); %bb.2(100.00%)

  $r2d = COPY %1:addr64bit
  nomerge CallBRASL @__asan_report_load8_noabort, $r2d, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc

bb.2 (%ir-block.8):
; predecessors: %bb.0, %bb.1
  successors: %bb.3(0x30000000), %bb.4(0x50000000); %bb.3(37.50%), %bb.4(62.50%)

  %2:gr64bit = LG %1:addr64bit, 0, $noreg :: (load (s64) from %ir.pending)
  CGR %2:gr64bit, %1:addr64bit, implicit-def $cc
  BRC 14, 6, %bb.4, implicit $cc
  J %bb.3

bb.3.entry.for.end_crit_edge:
; predecessors: %bb.2
  successors: %bb.36(0x80000000); %bb.36(100.00%)

  nomerge CallBRASL @__sanitizer_cov_trace_pc, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc
  J %bb.36

bb.4.for.body.lr.ph:
; predecessors: %bb.2
  successors: %bb.5(0x80000000); %bb.5(100.00%)

  %16:gr32bit = COPY %15.subreg_l32:gr64bit
  %21:gr64bit = LLGFR %16:gr32bit
  %22:gr64bit = LGHI 0
  $r2d = COPY %22:gr64bit
  $r3d = COPY %21:gr64bit
  CallBRASL @__sanitizer_cov_trace_const_cmp4, $r2d, $r3d, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc
  %23:gr64bit = LLILF 4294963201
  $r2d = COPY %23:gr64bit
  $r3d = COPY %21:gr64bit
  CallBRASL @__sanitizer_cov_trace_const_cmp4, $r2d, $r3d, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc
  %3:gr64bit = LA %14:addr64bit, 272, $noreg
  %25:gr64bit = LLIHH 28
  %36:gr64bit = LGHI 4
  %37:gr64bit = LGHI 3
  %41:gr64bit = LGHI 40
  %76:addr64bit = COPY %2:gr64bit

bb.5.for.body:
; predecessors: %bb.4, %bb.34
  successors: %bb.6(0x50000000), %bb.7(0x30000000); %bb.6(62.50%), %bb.7(37.50%)

  %4:addr64bit = COPY %76:addr64bit
  %24:gr64bit = SRLG %4:addr64bit, $noreg, 3
  %26:addr64bit = AGRK %24:gr64bit, %25:gr64bit, implicit-def dead $cc
  CLI %26:addr64bit, 0, 0, implicit-def $cc :: (load (s8) from %ir.13)
  BRC 14, 8, %bb.7, implicit $cc
  J %bb.6

bb.6 (%ir-block.16):
; predecessors: %bb.5
  successors: %bb.7(0x80000000); %bb.7(100.00%)

  $r2d = COPY %4:addr64bit
  nomerge CallBRASL @__asan_report_load8_noabort, $r2d, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc

bb.7 (%ir-block.18):
; predecessors: %bb.5, %bb.6
  successors: %bb.8(0x30000000), %bb.9(0x50000000); %bb.8(37.50%), %bb.9(62.50%)

  %5:gr64bit = LAY %4:addr64bit, -64, $noreg
  %6:gr64bit = LG %4:addr64bit, 0, $noreg :: (load (s64) from %ir..pn.in55)
  CHIMux %16:gr32bit, 0, implicit-def $cc
  BRC 14, 6, %bb.9, implicit $cc
  J %bb.8

bb.8.for.body.if.end_crit_edge:
; predecessors: %bb.7
  successors: %bb.30(0x80000000); %bb.30(100.00%)

  nomerge CallBRASL @__sanitizer_cov_trace_pc, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc
  J %bb.30

bb.9.cond.false.i:
; predecessors: %bb.7
  successors: %bb.10(0x40000000), %bb.11(0x40000000); %bb.10(50.00%), %bb.11(50.00%)

  %28:gr64bit = AGHIK %4:addr64bit, -9, implicit-def $cc
  %29:addr64bit = COPY %28:gr64bit
  INLINEASM &"\09tm\09$1,$2" [sideeffect] [mayload] [maystore] [attdialect], $0:[regdef:GR32Bit], def dead %27:gr32bit, $1:[mem:Q], %29:addr64bit, 0, $noreg, $2:[imm], 2, !4
  %74:gr32bit = IPM implicit $cc
  %30:gr32bit = COPY %74:gr32bit
  %31:gr32bit = IPM implicit $cc
  undef %32.subreg_l32:gr64bit = COPY %31:gr32bit
  %34:gr64bit = RISBG undef %34:gr64bit(tied-def 0), %32:gr64bit, 60, 191, 36, implicit-def dead $cc
  $r2d = COPY %36:gr64bit
  $r3d = COPY %34:gr64bit
  CallBRASL @__sanitizer_cov_trace_const_cmp4, $r2d, $r3d, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc
  $r2d = COPY %37:gr64bit
  $r3d = COPY %34:gr64bit
  CallBRASL @__sanitizer_cov_trace_const_cmp4, $r2d, $r3d, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc
  $cc = COPY %30:gr32bit
  BRC 15, 14, %bb.11, implicit $cc
  J %bb.10

bb.10.cond.false.i.if.end_crit_edge:
; predecessors: %bb.9
  successors: %bb.30(0x80000000); %bb.30(100.00%)

  nomerge CallBRASL @__sanitizer_cov_trace_pc, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc
  J %bb.30

bb.11.if.end.i:
; predecessors: %bb.9
  successors: %bb.12(0x50000000), %bb.13(0x30000000); %bb.12(62.50%), %bb.13(37.50%)

  %7:addr64bit = AGHIK %4:addr64bit, -56, implicit-def dead $cc
  %38:gr64bit = SRLG %7:addr64bit, $noreg, 3
  %40:addr64bit = AGRK %38:gr64bit, %25:gr64bit, implicit-def dead $cc
  CLI %40:addr64bit, 0, 0, implicit-def $cc :: (load (s8) from %ir.24)
  BRC 14, 8, %bb.13, implicit $cc
  J %bb.12

bb.12 (%ir-block.27):
; predecessors: %bb.11
  successors: %bb.13(0x80000000); %bb.13(100.00%)

  $r2d = COPY %7:addr64bit
  nomerge CallBRASL @__asan_report_load8_noabort, $r2d, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc

bb.13 (%ir-block.29):
; predecessors: %bb.11, %bb.12
  successors: %bb.14(0x50000000), %bb.15(0x30000000); %bb.14(62.50%), %bb.15(37.50%)

  %8:addr64bit = COPY %41:gr64bit
  %8:addr64bit = AG %8:addr64bit(tied-def 0), %7:addr64bit, 0, $noreg, implicit-def dead $cc :: (load (s64) from %ir.ops.i)
  %42:gr64bit = SRLG %8:addr64bit, $noreg, 3
  %44:addr64bit = AGRK %42:gr64bit, %25:gr64bit, implicit-def dead $cc
  CLI %44:addr64bit, 0, 0, implicit-def $cc :: (load (s8) from %ir.34)
  BRC 14, 8, %bb.15, implicit $cc
  J %bb.14

bb.14 (%ir-block.37):
; predecessors: %bb.13
  successors: %bb.15(0x80000000); %bb.15(100.00%)

  $r2d = COPY %8:addr64bit
  nomerge CallBRASL @__asan_report_load8_noabort, $r2d, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc

bb.15 (%ir-block.39):
; predecessors: %bb.13, %bb.14
  successors: %bb.16(0x30000000), %bb.17(0x50000000); %bb.16(37.50%), %bb.17(62.50%)

  %9:addr64bit = LG %8:addr64bit, 0, $noreg :: (load (s64) from %ir.signaled.i)
  CGHI %9:addr64bit, 0, implicit-def $cc
  BRC 14, 6, %bb.17, implicit $cc
  J %bb.16

bb.16.if.end.i.cond.false.i45_crit_edge:
; predecessors: %bb.15
  successors: %bb.20(0x80000000); %bb.20(100.00%)

  nomerge CallBRASL @__sanitizer_cov_trace_pc, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc
  J %bb.20

bb.17.land.lhs.true11.i:
; predecessors: %bb.15
  successors: %bb.19(0x40000000), %bb.18(0x40000000); %bb.19(50.00%), %bb.18(50.00%)

  $r2d = COPY %5:gr64bit
  CallBASR %9:addr64bit, $r2d, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $r2d
  %45:gr64bit = COPY $r2d
  %46:grx32bit = COPY %45.subreg_l32:gr64bit
  CHIMux %46:grx32bit, 0, implicit-def $cc
  BRC 14, 6, %bb.19, implicit $cc
  J %bb.18

bb.18.land.lhs.true11.i.cond.false.i45_crit_edge:
; predecessors: %bb.17
  successors: %bb.20(0x80000000); %bb.20(100.00%)

  nomerge CallBRASL @__sanitizer_cov_trace_pc, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc
  J %bb.20

bb.19.if.then16.i:
; predecessors: %bb.17
  successors: %bb.30(0x80000000); %bb.30(100.00%)

  nomerge CallBRASL @__sanitizer_cov_trace_pc, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc
  $r2d = COPY %5:gr64bit
  CallBRASL @dma_fence_signal_locked, $r2d, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def dead $r2d
  J %bb.30

bb.20.cond.false.i45:
; predecessors: %bb.18, %bb.16
  successors: %bb.22(0x00106035), %bb.21(0x7fef9fcb); %bb.22(0.05%), %bb.21(99.95%)

  %50:addr64bit = COPY %28:gr64bit
  INLINEASM &"\09tm\09$1,$2" [sideeffect] [mayload] [maystore] [attdialect], $0:[regdef:GR32Bit], def dead %48:gr32bit, $1:[mem:Q], %50:addr64bit, 0, $noreg, $2:[imm], 2, !4
  %75:gr32bit = IPM implicit $cc
  %51:gr32bit = COPY %75:gr32bit
  %52:gr32bit = IPM implicit $cc
  undef %53.subreg_l32:gr64bit = COPY %52:gr32bit
  %55:gr64bit = RISBG undef %55:gr64bit(tied-def 0), %53:gr64bit, 60, 191, 36, implicit-def dead $cc
  $r2d = COPY %36:gr64bit
  $r3d = COPY %55:gr64bit
  CallBRASL @__sanitizer_cov_trace_const_cmp4, $r2d, $r3d, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc
  $r2d = COPY %37:gr64bit
  $r3d = COPY %55:gr64bit
  CallBRASL @__sanitizer_cov_trace_const_cmp4, $r2d, $r3d, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc
  $cc = COPY %51:gr32bit
  BRC 15, 1, %bb.22, implicit killed $cc
  J %bb.21

bb.21.cond.false.i45.if.else59.i_crit_edge:
; predecessors: %bb.20
  successors: %bb.23(0x80000000); %bb.23(100.00%)

  nomerge CallBRASL @__sanitizer_cov_trace_pc, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc
  J %bb.23

bb.22.do.body26.i:
; predecessors: %bb.20
  successors: %bb.23(0x80000000); %bb.23(100.00%)

  nomerge CallBRASL @__sanitizer_cov_trace_pc, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc
  INLINEASM &"0:\09mc\090,0\0A.section .rodata.str,\22aMS\22,@progbits,1\0A1:\09.asciz\09\22include/linux/dma-fence.h\22\0A.previous\0A.section __bug_table,\22aw\22\0A2:\09.long\090b-.\0A\09.long\091b-.\0A\09.short\09$0,$1\0A\09.org\092b+$2\0A.previous\0A" [sideeffect] [mayload] [attdialect], $0:[imm], 585, $1:[imm], 2305, $2:[imm], 12, !6

bb.23.if.else59.i:
; predecessors: %bb.21, %bb.22
  successors: %bb.25(0x00106035), %bb.24(0x7fef9fcb); %bb.25(0.05%), %bb.24(99.95%)

  CLFIMux %16:gr32bit, 4294963201, implicit-def $cc
  BRC 14, 4, %bb.25, implicit killed $cc
  J %bb.24

bb.24.if.else59.i.dma_fence_set_error.exit_crit_edge:
; predecessors: %bb.23
  successors: %bb.26(0x80000000); %bb.26(100.00%)

  nomerge CallBRASL @__sanitizer_cov_trace_pc, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc
  J %bb.26

bb.25.do.body70.i:
; predecessors: %bb.23
  successors: %bb.26(0x80000000); %bb.26(100.00%)

  nomerge CallBRASL @__sanitizer_cov_trace_pc, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc
  INLINEASM &"0:\09mc\090,0\0A.section .rodata.str,\22aMS\22,@progbits,1\0A1:\09.asciz\09\22include/linux/dma-fence.h\22\0A.previous\0A.section __bug_table,\22aw\22\0A2:\09.long\090b-.\0A\09.long\091b-.\0A\09.short\09$0,$1\0A\09.org\092b+$2\0A.previous\0A" [sideeffect] [mayload] [attdialect], $0:[imm], 586, $1:[imm], 2305, $2:[imm], 12, !7

bb.26.dma_fence_set_error.exit:
; predecessors: %bb.24, %bb.25
  successors: %bb.27(0x00000800), %bb.29(0x7ffff800); %bb.27(0.00%), %bb.29(100.00%)

  %10:addr64bit = AGHIK %4:addr64bit, -4, implicit-def dead $cc
  %59:gr64bit = SRLG %10:addr64bit, $noreg, 3
  %61:addr64bit = AGRK %59:gr64bit, %25:gr64bit, implicit-def dead $cc
  %11:gr32bit = LBMux %61:addr64bit, 0, $noreg :: (load (s8) from %ir.46)
  CHIMux %11:gr32bit, 0, implicit-def $cc
  BRC 14, 8, %bb.29, implicit killed $cc
  J %bb.27

bb.27 (%ir-block.49):
; predecessors: %bb.26
  successors: %bb.28(0x40000000), %bb.29(0x40000000); %bb.28(50.00%), %bb.29(50.00%)

  %62:grx32bit = COPY %10.subreg_l32:addr64bit
  %63:grx32bit = RISBMux $noreg(tied-def 0), %62:grx32bit, 29, 159, 0
  %64:gr32bit = AHIMuxK %63:grx32bit, 3, implicit-def dead $cc
  CR %64:gr32bit, %11:gr32bit, implicit-def $cc
  BRC 14, 4, %bb.29, implicit killed $cc
  J %bb.28

bb.28 (%ir-block.55):
; predecessors: %bb.27
  successors: %bb.29(0x80000000); %bb.29(100.00%)

  $r2d = COPY %10:addr64bit
  nomerge CallBRASL @__asan_report_store4_noabort, $r2d, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc

bb.29 (%ir-block.57):
; predecessors: %bb.26, %bb.27, %bb.28
  successors: %bb.30(0x80000000); %bb.30(100.00%)

  STMux %16:gr32bit, %10:addr64bit, 0, $noreg :: (store (s32) into %ir.error85.i)

bb.30.if.end:
; predecessors: %bb.19, %bb.29, %bb.10, %bb.8
  successors: %bb.32(0x40000000), %bb.31(0x40000000); %bb.32(50.00%), %bb.31(50.00%)

  $r2d = COPY %5:gr64bit
  CallBRASL @nouveau_fence_signal, $r2d, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $r2d
  %65:gr64bit = COPY $r2d
  %66:grx32bit = COPY %65.subreg_l32:gr64bit
  CHIMux %66:grx32bit, 0, implicit-def $cc
  BRC 14, 6, %bb.32, implicit killed $cc
  J %bb.31

bb.31.if.end.for.inc_crit_edge:
; predecessors: %bb.30
  successors: %bb.33(0x80000000); %bb.33(100.00%)

  nomerge CallBRASL @__sanitizer_cov_trace_pc, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc
  J %bb.33

bb.32.if.then18:
; predecessors: %bb.30
  successors: %bb.33(0x80000000); %bb.33(100.00%)

  nomerge CallBRASL @__sanitizer_cov_trace_pc, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc
  $r2d = COPY %3:gr64bit
  CallBRASL @nvif_event_block, $r2d, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def dead $r2d

bb.33.for.inc:
; predecessors: %bb.31, %bb.32
  successors: %bb.35(0x04000000), %bb.34(0x7c000000); %bb.35(3.12%), %bb.34(96.88%)

  CGR %6:gr64bit, %1:addr64bit, implicit-def $cc
  BRC 14, 8, %bb.35, implicit killed $cc
  J %bb.34

bb.34.for.inc.for.body_crit_edge:
; predecessors: %bb.33
  successors: %bb.5(0x80000000); %bb.5(100.00%)

  nomerge CallBRASL @__sanitizer_cov_trace_pc, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc
  %76:addr64bit = COPY %6:gr64bit
  J %bb.5

bb.35.for.inc.for.end_crit_edge:
; predecessors: %bb.33
  successors: %bb.36(0x80000000); %bb.36(100.00%)

  nomerge CallBRASL @__sanitizer_cov_trace_pc, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc

bb.36.for.end:
; predecessors: %bb.35, %bb.3
  successors: %bb.37(0x00000800), %bb.39(0x7ffff800); %bb.37(0.00%), %bb.39(100.00%)

  %0:gr64bit = COPY %17:gr64bit
  %12:gr64bit = LA %14:addr64bit, 344, $noreg
  %68:gr64bit = SRLG %12:gr64bit, $noreg, 3
  %69:gr64bit = LLIHH 28
  %70:addr64bit = AGRK %68:gr64bit, %69:gr64bit, implicit-def dead $cc
  %13:gr32bit = LBMux %70:addr64bit, 0, $noreg :: (load (s8) from %ir.61)
  CHIMux %13:gr32bit, 0, implicit-def $cc
  BRC 14, 8, %bb.39, implicit killed $cc
  J %bb.37

bb.37 (%ir-block.64):
; predecessors: %bb.36
  successors: %bb.38(0x40000000), %bb.39(0x40000000); %bb.38(50.00%), %bb.39(50.00%)

  %71:grx32bit = COPY %12.subreg_l32:gr64bit
  %72:grx32bit = RISBMux $noreg(tied-def 0), %71:grx32bit, 29, 159, 0
  %73:gr32bit = AHIMuxK %72:grx32bit, 3, implicit-def dead $cc
  CR %73:gr32bit, %13:gr32bit, implicit-def $cc
  BRC 14, 4, %bb.39, implicit killed $cc
  J %bb.38

bb.38 (%ir-block.70):
; predecessors: %bb.37
  successors: %bb.39(0x80000000); %bb.39(100.00%)

  $r2d = COPY %12:gr64bit
  nomerge CallBRASL @__asan_report_store4_noabort, $r2d, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc

bb.39 (%ir-block.72):
; predecessors: %bb.36, %bb.37, %bb.38

  MVHI %14:addr64bit, 344, 1 :: (store (s32) into %ir.sunkaddr, align 8)
  $r2d = COPY %14:addr64bit
  $r3d = COPY %0:gr64bit
  CallJG @_raw_spin_unlock_irqrestore, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit $r2d, implicit $r3d

# End machine code for function nouveau_fence_context_kill.

*** Bad machine code: Using an undefined physical register ***
- function:    nouveau_fence_context_kill
- basic block: %bb.20 cond.false.i45 (0x5559e7064b80)
- instruction: %75:gr32bit = IPM implicit $cc
- operand 1:   implicit $cc

*** Bad machine code: Using an undefined physical register ***
- function:    nouveau_fence_context_kill
- basic block: %bb.20 cond.false.i45 (0x5559e7064b80)
- instruction: %52:gr32bit = IPM implicit $cc
- operand 1:   implicit $cc
fatal error: error in backend: Found 2 machine code errors.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace, preprocessed source, and associated run script.
Stack dump:
0.	Program arguments: clang ... -o drivers/gpu/drm/nouveau/nouveau_fence.o drivers/gpu/drm/nouveau/nouveau_fence.c
1.	<eof> parser at end of file
2.	Code generation
3.	Running pass 'Function Pass Manager' on module 'drivers/gpu/drm/nouveau/nouveau_fence.c'.
4.	Running pass 'Live Interval Analysis' on function '@nouveau_fence_context_kill'
 #0 0x00005559dd792528 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (clang+0x36fa528)
 #1 0x00005559dd78fca5 llvm::sys::RunSignalHandlers() (clang+0x36f7ca5)
 #2 0x00005559dd711e47 (anonymous namespace)::CrashRecoveryContextImpl::HandleCrash(int, unsigned long) CrashRecoveryContext.cpp:0:0
 #3 0x00005559dd711ddf llvm::CrashRecoveryContext::HandleExit(int) (clang+0x3679ddf)
 #4 0x00005559dd78c767 llvm::sys::Process::Exit(int, bool) (clang+0x36f4767)
 #5 0x00005559dc3cb136 (clang+0x2333136)
 #6 0x00005559dd718206 llvm::report_fatal_error(llvm::Twine const&, bool) (clang+0x3680206)
 #7 0x00005559dcdaa41e (clang+0x2d1241e)
 #8 0x00005559dcdaaa86 llvm::MachineFunction::verify(llvm::Pass*, char const*, llvm::raw_ostream*, bool) const (clang+0x2d12a86)
 #9 0x00005559dcc43cc2 llvm::LiveRangeCalc::findReachingDefs(llvm::LiveRange&, llvm::MachineBasicBlock&, llvm::SlotIndex, llvm::Register, llvm::ArrayRef<llvm::SlotIndex>) (clang+0x2babcc2)
#10 0x00005559dcc42f5b llvm::LiveRangeCalc::extend(llvm::LiveRange&, llvm::SlotIndex, llvm::Register, llvm::ArrayRef<llvm::SlotIndex>) (clang+0x2baaf5b)
#11 0x00005559dcc46d19 llvm::LiveIntervalCalc::extendToUses(llvm::LiveRange&, llvm::Register, llvm::LaneBitmask, llvm::LiveInterval*) (clang+0x2baed19)
#12 0x00005559dcc2ee43 llvm::LiveIntervals::computeRegUnitRange(llvm::LiveRange&, unsigned int) (clang+0x2b96e43)
#13 0x00005559dcc2e1af llvm::LiveIntervals::computeLiveInRegUnits() (clang+0x2b961af)
#14 0x00005559dcc2caff llvm::LiveIntervals::analyze(llvm::MachineFunction&) (clang+0x2b94aff)
#15 0x00005559dcc2c8ac llvm::LiveIntervalsWrapperPass::runOnMachineFunction(llvm::MachineFunction&) (clang+0x2b948ac)
#16 0x00005559dccdff23 llvm::MachineFunctionPass::runOnFunction(llvm::Function&) (clang+0x2c47f23)
#17 0x00005559dd278cb8 llvm::FPPassManager::runOnFunction(llvm::Function&) (clang+0x31e0cb8)
#18 0x00005559dd2803d2 llvm::FPPassManager::runOnModule(llvm::Module&) (clang+0x31e83d2)
#19 0x00005559dd2796a0 llvm::legacy::PassManagerImpl::run(llvm::Module&) (clang+0x31e16a0)
#20 0x00005559ddeed7de clang::emitBackendOutput(clang::CompilerInstance&, clang::CodeGenOptions&, llvm::StringRef, llvm::Module*, clang::BackendAction, llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem>, std::unique_ptr<llvm::raw_pwrite_stream, std::default_delete<llvm::raw_pwrite_stream>>, clang::BackendConsumer*) (clang+0x3e557de)
#21 0x00005559ddf03008 clang::BackendConsumer::HandleTranslationUnit(clang::ASTContext&) (clang+0x3e6b008)
#22 0x00005559df505819 clang::ParseAST(clang::Sema&, bool, bool) (clang+0x546d819)
#23 0x00005559de3ffb16 clang::FrontendAction::Execute() (clang+0x4367b16)
#24 0x00005559de3687cd clang::CompilerInstance::ExecuteAction(clang::FrontendAction&) (clang+0x42d07cd)
#25 0x00005559de4d7775 clang::ExecuteCompilerInvocation(clang::CompilerInstance*) (clang+0x443f775)
#26 0x00005559dc3caa77 cc1_main(llvm::ArrayRef<char const*>, char const*, void*) (clang+0x2332a77)
#27 0x00005559dc3c6875 ExecuteCC1Tool(llvm::SmallVectorImpl<char const*>&, llvm::ToolContext const&, llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem>) driver.cpp:0:0
#28 0x00005559dc3c8e4d int llvm::function_ref<int (llvm::SmallVectorImpl<char const*>&)>::callback_fn<clang_main(int, char**, llvm::ToolContext const&)::$_0>(long, llvm::SmallVectorImpl<char const*>&) driver.cpp:0:0
#29 0x00005559de1cda69 void llvm::function_ref<void ()>::callback_fn<clang::driver::CC1Command::Execute(llvm::ArrayRef<std::optional<llvm::StringRef>>, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>*, bool*) const::$_0>(long) Job.cpp:0:0
#30 0x00005559dd711d7e llvm::CrashRecoveryContext::RunSafely(llvm::function_ref<void ()>) (clang+0x3679d7e)
#31 0x00005559de1cd2a3 clang::driver::CC1Command::Execute(llvm::ArrayRef<std::optional<llvm::StringRef>>, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>*, bool*) const (clang+0x41352a3)
#32 0x00005559de18eadc clang::driver::Compilation::ExecuteCommand(clang::driver::Command const&, clang::driver::Command const*&, bool) const (clang+0x40f6adc)
#33 0x00005559de18ecf7 clang::driver::Compilation::ExecuteJobs(clang::driver::JobList const&, llvm::SmallVectorImpl<std::pair<int, clang::driver::Command const*>>&, bool) const (clang+0x40f6cf7)
#34 0x00005559de1a8578 clang::driver::Driver::ExecuteCompilation(clang::driver::Compilation&, llvm::SmallVectorImpl<std::pair<int, clang::driver::Command const*>>&) (clang+0x4110578)
#35 0x00005559dc3c6130 clang_main(int, char**, llvm::ToolContext const&) (clang+0x232e130)
#36 0x00005559dc3d6247 main (clang+0x233e247)
#37 0x00007f2cc3027635 (/usr/lib/libc.so.6+0x27635)
#38 0x00007f2cc30276e9 __libc_start_main (/usr/lib/libc.so.6+0x276e9)
#39 0x00005559dc3c42e5 _start (clang+0x232c2e5)
clang: error: clang frontend command failed with exit code 70 (use -v to see invocation)
ClangBuiltLinux clang version 22.0.0git (https://github.com/llvm/llvm-project.git 097f1e7625966673b881df63a241f755317b0bb9)
...

cvise spits out:

int arch_test_bit_cc, nouveau_fence_wait_legacy_fence;
long jiffies, nouveau_fence_wait_legacy_wait;
long nouveau_fence_wait_legacy() {
  long t = jiffies, timeout = nouveau_fence_wait_legacy_wait;
  while (nouveau_fence_wait_legacy_fence)
    asm("" : "=@cc"(arch_test_bit_cc));
  return timeout - t;
}
$ clang --target=s390x-linux-gnu -c -o /dev/null nouveau_fence.i

$ clang --target=s390x-linux-gnu -O2 -c -o /dev/null nouveau_fence.i

# Machine code for function nouveau_fence_wait_legacy: NoPHIs, TracksLiveness, TiedOpsRewritten
Function Live Ins: $cc

bb.0.entry:
  successors: %bb.3(0x30000000), %bb.1(0x50000000); %bb.3(37.50%), %bb.1(62.50%)
  liveins: $cc
  %0:addr64bit = LARL @nouveau_fence_wait_legacy_fence
  CHSI %0:addr64bit, 0, 0, implicit-def $cc :: (dereferenceable load (s32) from @nouveau_fence_wait_legacy_fence, !tbaa !4)
  BRC 14, 8, %bb.3, implicit $cc
  J %bb.1

bb.1.while.body.lr.ph:
; predecessors: %bb.0
  successors: %bb.2(0x80000000); %bb.2(100.00%)

  INLINEASM &"" [maystore] [attdialect], $0:[regdef:GR32Bit], def dead %1:gr32bit, !8
  %2:gr32bit = IPM implicit $cc
  %3:gr32bit = COPY %2:gr32bit
  %3:gr32bit = SRL %3:gr32bit(tied-def 0), $noreg, 28
  STRL %3:gr32bit, @arch_test_bit_cc :: (store (s32) into @arch_test_bit_cc, !tbaa !4)

bb.2.while.body:
; predecessors: %bb.1, %bb.2
  successors: %bb.2(0x80000000); %bb.2(100.00%)

  J %bb.2

bb.3.while.end:
; predecessors: %bb.0

  %4:gr64bit = LGRL @nouveau_fence_wait_legacy_wait :: (dereferenceable load (s64) from @nouveau_fence_wait_legacy_wait, !tbaa !9)
  %5:addr64bit = LARL @jiffies
  %6:gr64bit = COPY %4:gr64bit
  %6:gr64bit = nsw SG %6:gr64bit(tied-def 0), %5:addr64bit, 0, $noreg, implicit-def dead $cc :: (dereferenceable load (s64) from @jiffies, !tbaa !9)
  $r2d = COPY %6:gr64bit
  Return implicit killed $r2d

# End machine code for function nouveau_fence_wait_legacy.

*** Bad machine code: Using an undefined physical register ***
- function:    nouveau_fence_wait_legacy
- basic block: %bb.1 while.body.lr.ph (0x55dba4f72718)
- instruction: %2:gr32bit = IPM implicit $cc
- operand 1:   implicit $cc
fatal error: error in backend: Found 1 machine code errors.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace, preprocessed source, and associated run script.
Stack dump:
0.      Program arguments: clang --target=s390x-linux-gnu -O2 -c -o /dev/null nouveau_fence.i
1.      <eof> parser at end of file
2.      Code generation
3.      Running pass 'Function Pass Manager' on module 'nouveau_fence.i'.
4.      Running pass 'Live Interval Analysis' on function '@nouveau_fence_wait_legacy'
 #0 0x000055db9b16c2b8 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (clang-22+0x36f42b8)
 #1 0x000055db9b169a35 llvm::sys::RunSignalHandlers() (clang-22+0x36f1a35)
 #2 0x000055db9b0ebb97 (anonymous namespace)::CrashRecoveryContextImpl::HandleCrash(int, unsigned long) CrashRecoveryContext.cpp:0:0
 #3 0x000055db9b0ebb2f llvm::CrashRecoveryContext::HandleExit(int) (clang-22+0x3673b2f)
 #4 0x000055db9b1664f7 llvm::sys::Process::Exit(int, bool) (clang-22+0x36ee4f7)
 #5 0x000055db99da40c6 (clang-22+0x232c0c6)
 #6 0x000055db9b0f1f56 llvm::report_fatal_error(llvm::Twine const&, bool) (clang-22+0x3679f56)
 #7 0x000055db9a783e5e (clang-22+0x2d0be5e)
 #8 0x000055db9a7844c6 llvm::MachineFunction::verify(llvm::Pass*, char const*, llvm::raw_ostream*, bool) const (clang-22+0x2d0c4c6)
 #9 0x000055db9a61d5c2 llvm::LiveRangeCalc::findReachingDefs(llvm::LiveRange&, llvm::MachineBasicBlock&, llvm::SlotIndex, llvm::Register, llvm::ArrayRef<llvm::SlotIndex>) (clang-22+0x2ba55c2)
#10 0x000055db9a61c85b llvm::LiveRangeCalc::extend(llvm::LiveRange&, llvm::SlotIndex, llvm::Register, llvm::ArrayRef<llvm::SlotIndex>) (clang-22+0x2ba485b)
#11 0x000055db9a620629 llvm::LiveIntervalCalc::extendToUses(llvm::LiveRange&, llvm::Register, llvm::LaneBitmask, llvm::LiveInterval*) (clang-22+0x2ba8629)
#12 0x000055db9a6087b3 llvm::LiveIntervals::computeRegUnitRange(llvm::LiveRange&, unsigned int) (clang-22+0x2b907b3)
#13 0x000055db9a607b1f llvm::LiveIntervals::computeLiveInRegUnits() (clang-22+0x2b8fb1f)
#14 0x000055db9a60646f llvm::LiveIntervals::analyze(llvm::MachineFunction&) (clang-22+0x2b8e46f)
#15 0x000055db9a60621c llvm::LiveIntervalsWrapperPass::runOnMachineFunction(llvm::MachineFunction&) (clang-22+0x2b8e21c)
#16 0x000055db9a6b9903 llvm::MachineFunctionPass::runOnFunction(llvm::Function&) (clang-22+0x2c41903)
#17 0x000055db9ac52a48 llvm::FPPassManager::runOnFunction(llvm::Function&) (clang-22+0x31daa48)
#18 0x000055db9ac5a162 llvm::FPPassManager::runOnModule(llvm::Module&) (clang-22+0x31e2162)
#19 0x000055db9ac53430 llvm::legacy::PassManagerImpl::run(llvm::Module&) (clang-22+0x31db430)
#20 0x000055db9b8c473e clang::emitBackendOutput(clang::CompilerInstance&, clang::CodeGenOptions&, llvm::StringRef, llvm::Module*, clang::BackendAction, llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem>, std::unique_ptr<llvm::raw_pwrite_stream, std::default_delete<llvm::raw_pwrite_stream>>, clang::BackendConsumer*) (clang-22+0x3e4c73e)
#21 0x000055db9b8d9f68 clang::BackendConsumer::HandleTranslationUnit(clang::ASTContext&) (clang-22+0x3e61f68)
#22 0x000055db9cee0559 clang::ParseAST(clang::Sema&, bool, bool) (clang-22+0x5468559)
#23 0x000055db9bdd6b16 clang::FrontendAction::Execute() (clang-22+0x435eb16)
#24 0x000055db9bd3f77d clang::CompilerInstance::ExecuteAction(clang::FrontendAction&) (clang-22+0x42c777d)
#25 0x000055db9beaec05 clang::ExecuteCompilerInvocation(clang::CompilerInstance*) (clang-22+0x4436c05)
#26 0x000055db99da3a07 cc1_main(llvm::ArrayRef<char const*>, char const*, void*) (clang-22+0x232ba07)
#27 0x000055db99d9f7f5 ExecuteCC1Tool(llvm::SmallVectorImpl<char const*>&, llvm::ToolContext const&, llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem>) driver.cpp:0:0
#28 0x000055db99da1ddd int llvm::function_ref<int (llvm::SmallVectorImpl<char const*>&)>::callback_fn<clang_main(int, char**, llvm::ToolContext const&)::$_0>(long, llvm::SmallVectorImpl<char const*>&) driver.cpp:0:0
#29 0x000055db9bba4b69 void llvm::function_ref<void ()>::callback_fn<clang::driver::CC1Command::Execute(llvm::ArrayRef<std::optional<llvm::StringRef>>, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>*, bool*) const::$_0>(long) Job.cpp:0:0
#30 0x000055db9b0ebace llvm::CrashRecoveryContext::RunSafely(llvm::function_ref<void ()>) (clang-22+0x3673ace)
#31 0x000055db9bba43a3 clang::driver::CC1Command::Execute(llvm::ArrayRef<std::optional<llvm::StringRef>>, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>*, bool*) const (clang-22+0x412c3a3)
#32 0x000055db9bb65b2c clang::driver::Compilation::ExecuteCommand(clang::driver::Command const&, clang::driver::Command const*&, bool) const (clang-22+0x40edb2c)
#33 0x000055db9bb65d47 clang::driver::Compilation::ExecuteJobs(clang::driver::JobList const&, llvm::SmallVectorImpl<std::pair<int, clang::driver::Command const*>>&, bool) const (clang-22+0x40edd47)
#34 0x000055db9bb7f5f8 clang::driver::Driver::ExecuteCompilation(clang::driver::Compilation&, llvm::SmallVectorImpl<std::pair<int, clang::driver::Command const*>>&) (clang-22+0x41075f8)
#35 0x000055db99d9f0b0 clang_main(int, char**, llvm::ToolContext const&) (clang-22+0x23270b0)
#36 0x000055db99daf1d7 main (clang-22+0x23371d7)
#37 0x00007f45f4827635 (/usr/lib/libc.so.6+0x27635)
#38 0x00007f45f48276e9 __libc_start_main (/usr/lib/libc.so.6+0x276e9)
#39 0x000055db99d9d265 _start (clang-22+0x2325265)
clang: error: clang frontend command failed with exit code 70 (use -v to see invocation)
ClangBuiltLinux clang version 22.0.0git (https://github.com/llvm/llvm-project.git 6712e20c5261376a6b0015fb3c8d15124757d47d)
...

nathanchance avatar Oct 15 '25 16:10 nathanchance

Thank you, Nathan, for reporting the issue with flag output operand for SystemZ with unit test case. I am able to reproduce it. I am working on it.


From: Nathan Chancellor @.> Sent: Wednesday, October 15, 2025 11:37 AM To: llvm/llvm-project @.> Cc: Anoop Kumar @.>; Author @.> Subject: [EXTERNAL] Re: [llvm/llvm-project] Add support for flag output operand @.***" for SystemZ. (PR #125970)

nathanchance left a comment (llvm/llvm-project#125970) I am seeing a crash while building the Linux kernel for ARCH=s390 after this change. # bad: [96da982128bf7b005afa24a8e6e41e5867d30bc4] [sanitizers] COMPILER_RT_ASAN_UNIT_TESTS_USE_HOST_RUNTIME

[https://avatars.githubusercontent.com/u/11478138?s=20&v=4%5Dnathanchance left a comment (llvm/llvm-project#125970)<https://github.com/llvm/llvm-project/pull/125970#issuecomment-3407355089 >

I am seeing a crash while building the Linux kernel for ARCH=s390 after this change.

bad: [96da982128bf7b005afa24a8e6e41e5867d30bc4] [sanitizers] COMPILER_RT_ASAN_UNIT_TESTS_USE_HOST_RUNTIME to build only unit tests (#161455)

good: [e8f721e621d85a2670f13307b1b99528cf5e8708] [clang][docs] Update doc and release note for probe instrumentation (#162606)

git bisect start '96da982128bf7b005afa24a8e6e41e5867d30bc4' 'e8f721e621d85a2670f13307b1b99528cf5e8708'

bad: [da5fb5e964c213d0ec834ad0b560a523a57ce5cc] [ObjCopy][DX] Support for -dump-section flag (#159999)

git bisect bad da5fb5e964c213d0ec834ad0b560a523a57ce5cc

bad: [69e0fd6d8dea666205fca52265f09b3eb5ee2f3d] [X86] Remove PREFETCHI from PTL (#163196)

git bisect bad 69e0fd6d8dea666205fca52265f09b3eb5ee2f3d

good: [782dd178fcb3b146dd16792b54c867095b863ccc] [SPIRV] Do not emit @llvm.compiler.used (#162678)

git bisect good 782dd178fcb3b146dd16792b54c867095b863ccc

good: [4a8dd4998dae8b7d67e416d20a1fa8a9451c64f5] [BOLT][NFC] Fix for a dangling reference UB (#163344)

git bisect good 4a8dd4998dae8b7d67e416d20a1fa8a9451c64f5

bad: [d7fc7703402184792319f65570ad6a49ffe8cde7] [LLVM][DAGCombiner] Improve simplifyDivRem's effectiveness after type legalisation. (#162706)

git bisect bad d7fc7703402184792319f65570ad6a49ffe8cde7

good: [3793e75b7af7e4908316e7869d8fc61517401865] [libc++][C++03] Cherry-pick #129348 (#162821)

git bisect good 3793e75b7af7e4908316e7869d8fc61517401865

bad: [6712e20c5261376a6b0015fb3c8d15124757d47d] Add support for flag output operand @.***" for SystemZ. (#125970)

git bisect bad 6712e20c5261376a6b0015fb3c8d15124757d47d

first bad commit: [6712e20c5261376a6b0015fb3c8d15124757d47d] Add support for flag output operand @.***" for SystemZ. (#125970)

$ make -skj"$(nproc)" ARCH=s390 LLVM=1 clean allmodconfig drivers/gpu/drm/nouveau/nouveau_fence.o

Machine code for function nouveau_fence_context_kill: NoPHIs, TracksLiveness, TiedOpsRewritten

Function Live Ins: $r2d in %14, $r3d in %15, $cc, $cc

bb.0.entry: successors: %bb.1(0x50000000), %bb.2(0x30000000); %bb.1(62.50%), %bb.2(37.50%) liveins: $r2d, $r3d, $cc, $cc %15:gr64bit = COPY $r3d %14:addr64bit = COPY $r2d nomerge CallBRASL @__sanitizer_cov_trace_pc, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc $r2d = COPY %14:addr64bit CallBRASL @_raw_spin_lock_irqsave, $r2d, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $r2d %17:gr64bit = COPY $r2d %1:addr64bit = LA %14:addr64bit, 80, $noreg %18:gr64bit = SRLG %1:addr64bit, $noreg, 3 %19:gr64bit = LLIHH 28 %20:addr64bit = AGRK %18:gr64bit, %19:gr64bit, implicit-def dead $cc CLI %20:addr64bit, 0, 0, implicit-def $cc :: (load (s8) from %ir.3) BRC 14, 8, %bb.2, implicit $cc J %bb.1

bb.1 (%ir-block.6): ; predecessors: %bb.0 successors: %bb.2(0x80000000); %bb.2(100.00%)

$r2d = COPY %1:addr64bit nomerge CallBRASL @__asan_report_load8_noabort, $r2d, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc

bb.2 (%ir-block.8): ; predecessors: %bb.0, %bb.1 successors: %bb.3(0x30000000), %bb.4(0x50000000); %bb.3(37.50%), %bb.4(62.50%)

%2:gr64bit = LG %1:addr64bit, 0, $noreg :: (load (s64) from %ir.pending) CGR %2:gr64bit, %1:addr64bit, implicit-def $cc BRC 14, 6, %bb.4, implicit $cc J %bb.3

bb.3.entry.for.end_crit_edge: ; predecessors: %bb.2 successors: %bb.36(0x80000000); %bb.36(100.00%)

nomerge CallBRASL @__sanitizer_cov_trace_pc, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc J %bb.36

bb.4.for.body.lr.ph: ; predecessors: %bb.2 successors: %bb.5(0x80000000); %bb.5(100.00%)

%16:gr32bit = COPY %15.subreg_l32:gr64bit %21:gr64bit = LLGFR %16:gr32bit %22:gr64bit = LGHI 0 $r2d = COPY %22:gr64bit $r3d = COPY %21:gr64bit CallBRASL @__sanitizer_cov_trace_const_cmp4, $r2d, $r3d, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc %23:gr64bit = LLILF 4294963201 $r2d = COPY %23:gr64bit $r3d = COPY %21:gr64bit CallBRASL @__sanitizer_cov_trace_const_cmp4, $r2d, $r3d, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc %3:gr64bit = LA %14:addr64bit, 272, $noreg %25:gr64bit = LLIHH 28 %36:gr64bit = LGHI 4 %37:gr64bit = LGHI 3 %41:gr64bit = LGHI 40 %76:addr64bit = COPY %2:gr64bit

bb.5.for.body: ; predecessors: %bb.4, %bb.34 successors: %bb.6(0x50000000), %bb.7(0x30000000); %bb.6(62.50%), %bb.7(37.50%)

%4:addr64bit = COPY %76:addr64bit %24:gr64bit = SRLG %4:addr64bit, $noreg, 3 %26:addr64bit = AGRK %24:gr64bit, %25:gr64bit, implicit-def dead $cc CLI %26:addr64bit, 0, 0, implicit-def $cc :: (load (s8) from %ir.13) BRC 14, 8, %bb.7, implicit $cc J %bb.6

bb.6 (%ir-block.16): ; predecessors: %bb.5 successors: %bb.7(0x80000000); %bb.7(100.00%)

$r2d = COPY %4:addr64bit nomerge CallBRASL @__asan_report_load8_noabort, $r2d, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc

bb.7 (%ir-block.18): ; predecessors: %bb.5, %bb.6 successors: %bb.8(0x30000000), %bb.9(0x50000000); %bb.8(37.50%), %bb.9(62.50%)

%5:gr64bit = LAY %4:addr64bit, -64, $noreg %6:gr64bit = LG %4:addr64bit, 0, $noreg :: (load (s64) from %ir..pn.in55) CHIMux %16:gr32bit, 0, implicit-def $cc BRC 14, 6, %bb.9, implicit $cc J %bb.8

bb.8.for.body.if.end_crit_edge: ; predecessors: %bb.7 successors: %bb.30(0x80000000); %bb.30(100.00%)

nomerge CallBRASL @__sanitizer_cov_trace_pc, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc J %bb.30

bb.9.cond.false.i: ; predecessors: %bb.7 successors: %bb.10(0x40000000), %bb.11(0x40000000); %bb.10(50.00%), %bb.11(50.00%)

%28:gr64bit = AGHIK %4:addr64bit, -9, implicit-def $cc %29:addr64bit = COPY %28:gr64bit INLINEASM &"\09tm\09$1,$2" [sideeffect] [mayload] [maystore] [attdialect], $0:[regdef:GR32Bit], def dead %27:gr32bit, $1:[mem:Q], %29:addr64bit, 0, $noreg, $2:[imm], 2, !4 %74:gr32bit = IPM implicit $cc %30:gr32bit = COPY %74:gr32bit %31:gr32bit = IPM implicit $cc undef %32.subreg_l32:gr64bit = COPY %31:gr32bit %34:gr64bit = RISBG undef %34:gr64bit(tied-def 0), %32:gr64bit, 60, 191, 36, implicit-def dead $cc $r2d = COPY %36:gr64bit $r3d = COPY %34:gr64bit CallBRASL @__sanitizer_cov_trace_const_cmp4, $r2d, $r3d, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc $r2d = COPY %37:gr64bit $r3d = COPY %34:gr64bit CallBRASL @__sanitizer_cov_trace_const_cmp4, $r2d, $r3d, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc $cc = COPY %30:gr32bit BRC 15, 14, %bb.11, implicit $cc J %bb.10

bb.10.cond.false.i.if.end_crit_edge: ; predecessors: %bb.9 successors: %bb.30(0x80000000); %bb.30(100.00%)

nomerge CallBRASL @__sanitizer_cov_trace_pc, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc J %bb.30

bb.11.if.end.i: ; predecessors: %bb.9 successors: %bb.12(0x50000000), %bb.13(0x30000000); %bb.12(62.50%), %bb.13(37.50%)

%7:addr64bit = AGHIK %4:addr64bit, -56, implicit-def dead $cc %38:gr64bit = SRLG %7:addr64bit, $noreg, 3 %40:addr64bit = AGRK %38:gr64bit, %25:gr64bit, implicit-def dead $cc CLI %40:addr64bit, 0, 0, implicit-def $cc :: (load (s8) from %ir.24) BRC 14, 8, %bb.13, implicit $cc J %bb.12

bb.12 (%ir-block.27): ; predecessors: %bb.11 successors: %bb.13(0x80000000); %bb.13(100.00%)

$r2d = COPY %7:addr64bit nomerge CallBRASL @__asan_report_load8_noabort, $r2d, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc

bb.13 (%ir-block.29): ; predecessors: %bb.11, %bb.12 successors: %bb.14(0x50000000), %bb.15(0x30000000); %bb.14(62.50%), %bb.15(37.50%)

%8:addr64bit = COPY %41:gr64bit %8:addr64bit = AG %8:addr64bit(tied-def 0), %7:addr64bit, 0, $noreg, implicit-def dead $cc :: (load (s64) from %ir.ops.i) %42:gr64bit = SRLG %8:addr64bit, $noreg, 3 %44:addr64bit = AGRK %42:gr64bit, %25:gr64bit, implicit-def dead $cc CLI %44:addr64bit, 0, 0, implicit-def $cc :: (load (s8) from %ir.34) BRC 14, 8, %bb.15, implicit $cc J %bb.14

bb.14 (%ir-block.37): ; predecessors: %bb.13 successors: %bb.15(0x80000000); %bb.15(100.00%)

$r2d = COPY %8:addr64bit nomerge CallBRASL @__asan_report_load8_noabort, $r2d, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc

bb.15 (%ir-block.39): ; predecessors: %bb.13, %bb.14 successors: %bb.16(0x30000000), %bb.17(0x50000000); %bb.16(37.50%), %bb.17(62.50%)

%9:addr64bit = LG %8:addr64bit, 0, $noreg :: (load (s64) from %ir.signaled.i) CGHI %9:addr64bit, 0, implicit-def $cc BRC 14, 6, %bb.17, implicit $cc J %bb.16

bb.16.if.end.i.cond.false.i45_crit_edge: ; predecessors: %bb.15 successors: %bb.20(0x80000000); %bb.20(100.00%)

nomerge CallBRASL @__sanitizer_cov_trace_pc, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc J %bb.20

bb.17.land.lhs.true11.i: ; predecessors: %bb.15 successors: %bb.19(0x40000000), %bb.18(0x40000000); %bb.19(50.00%), %bb.18(50.00%)

$r2d = COPY %5:gr64bit CallBASR %9:addr64bit, $r2d, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $r2d %45:gr64bit = COPY $r2d %46:grx32bit = COPY %45.subreg_l32:gr64bit CHIMux %46:grx32bit, 0, implicit-def $cc BRC 14, 6, %bb.19, implicit $cc J %bb.18

bb.18.land.lhs.true11.i.cond.false.i45_crit_edge: ; predecessors: %bb.17 successors: %bb.20(0x80000000); %bb.20(100.00%)

nomerge CallBRASL @__sanitizer_cov_trace_pc, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc J %bb.20

bb.19.if.then16.i: ; predecessors: %bb.17 successors: %bb.30(0x80000000); %bb.30(100.00%)

nomerge CallBRASL @__sanitizer_cov_trace_pc, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc $r2d = COPY %5:gr64bit CallBRASL @dma_fence_signal_locked, $r2d, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def dead $r2d J %bb.30

bb.20.cond.false.i45: ; predecessors: %bb.18, %bb.16 successors: %bb.22(0x00106035), %bb.21(0x7fef9fcb); %bb.22(0.05%), %bb.21(99.95%)

%50:addr64bit = COPY %28:gr64bit INLINEASM &"\09tm\09$1,$2" [sideeffect] [mayload] [maystore] [attdialect], $0:[regdef:GR32Bit], def dead %48:gr32bit, $1:[mem:Q], %50:addr64bit, 0, $noreg, $2:[imm], 2, !4 %75:gr32bit = IPM implicit $cc %51:gr32bit = COPY %75:gr32bit %52:gr32bit = IPM implicit $cc undef %53.subreg_l32:gr64bit = COPY %52:gr32bit %55:gr64bit = RISBG undef %55:gr64bit(tied-def 0), %53:gr64bit, 60, 191, 36, implicit-def dead $cc $r2d = COPY %36:gr64bit $r3d = COPY %55:gr64bit CallBRASL @__sanitizer_cov_trace_const_cmp4, $r2d, $r3d, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc $r2d = COPY %37:gr64bit $r3d = COPY %55:gr64bit CallBRASL @__sanitizer_cov_trace_const_cmp4, $r2d, $r3d, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc $cc = COPY %51:gr32bit BRC 15, 1, %bb.22, implicit killed $cc J %bb.21

bb.21.cond.false.i45.if.else59.i_crit_edge: ; predecessors: %bb.20 successors: %bb.23(0x80000000); %bb.23(100.00%)

nomerge CallBRASL @__sanitizer_cov_trace_pc, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc J %bb.23

bb.22.do.body26.i: ; predecessors: %bb.20 successors: %bb.23(0x80000000); %bb.23(100.00%)

nomerge CallBRASL @__sanitizer_cov_trace_pc, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc INLINEASM &"0:\09mc\090,0\0A.section .@.***,1\0A1:\09.asciz\09\22include/linux/dma-fence.h\22\0A.previous\0A.section __bug_table,\22aw\22\0A2:\09.long\090b-.\0A\09.long\091b-.\0A\09.short\09$0,$1\0A\09.org\092b+$2\0A.previous\0A" [sideeffect] [mayload] [attdialect], $0:[imm], 585, $1:[imm], 2305, $2:[imm], 12, !6

bb.23.if.else59.i: ; predecessors: %bb.21, %bb.22 successors: %bb.25(0x00106035), %bb.24(0x7fef9fcb); %bb.25(0.05%), %bb.24(99.95%)

CLFIMux %16:gr32bit, 4294963201, implicit-def $cc BRC 14, 4, %bb.25, implicit killed $cc J %bb.24

bb.24.if.else59.i.dma_fence_set_error.exit_crit_edge: ; predecessors: %bb.23 successors: %bb.26(0x80000000); %bb.26(100.00%)

nomerge CallBRASL @__sanitizer_cov_trace_pc, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc J %bb.26

bb.25.do.body70.i: ; predecessors: %bb.23 successors: %bb.26(0x80000000); %bb.26(100.00%)

nomerge CallBRASL @__sanitizer_cov_trace_pc, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc INLINEASM &"0:\09mc\090,0\0A.section .@.***,1\0A1:\09.asciz\09\22include/linux/dma-fence.h\22\0A.previous\0A.section __bug_table,\22aw\22\0A2:\09.long\090b-.\0A\09.long\091b-.\0A\09.short\09$0,$1\0A\09.org\092b+$2\0A.previous\0A" [sideeffect] [mayload] [attdialect], $0:[imm], 586, $1:[imm], 2305, $2:[imm], 12, !7

bb.26.dma_fence_set_error.exit: ; predecessors: %bb.24, %bb.25 successors: %bb.27(0x00000800), %bb.29(0x7ffff800); %bb.27(0.00%), %bb.29(100.00%)

%10:addr64bit = AGHIK %4:addr64bit, -4, implicit-def dead $cc %59:gr64bit = SRLG %10:addr64bit, $noreg, 3 %61:addr64bit = AGRK %59:gr64bit, %25:gr64bit, implicit-def dead $cc %11:gr32bit = LBMux %61:addr64bit, 0, $noreg :: (load (s8) from %ir.46) CHIMux %11:gr32bit, 0, implicit-def $cc BRC 14, 8, %bb.29, implicit killed $cc J %bb.27

bb.27 (%ir-block.49): ; predecessors: %bb.26 successors: %bb.28(0x40000000), %bb.29(0x40000000); %bb.28(50.00%), %bb.29(50.00%)

%62:grx32bit = COPY %10.subreg_l32:addr64bit %63:grx32bit = RISBMux $noreg(tied-def 0), %62:grx32bit, 29, 159, 0 %64:gr32bit = AHIMuxK %63:grx32bit, 3, implicit-def dead $cc CR %64:gr32bit, %11:gr32bit, implicit-def $cc BRC 14, 4, %bb.29, implicit killed $cc J %bb.28

bb.28 (%ir-block.55): ; predecessors: %bb.27 successors: %bb.29(0x80000000); %bb.29(100.00%)

$r2d = COPY %10:addr64bit nomerge CallBRASL @__asan_report_store4_noabort, $r2d, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc

bb.29 (%ir-block.57): ; predecessors: %bb.26, %bb.27, %bb.28 successors: %bb.30(0x80000000); %bb.30(100.00%)

STMux %16:gr32bit, %10:addr64bit, 0, $noreg :: (store (s32) into %ir.error85.i)

bb.30.if.end: ; predecessors: %bb.19, %bb.29, %bb.10, %bb.8 successors: %bb.32(0x40000000), %bb.31(0x40000000); %bb.32(50.00%), %bb.31(50.00%)

$r2d = COPY %5:gr64bit CallBRASL @nouveau_fence_signal, $r2d, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $r2d %65:gr64bit = COPY $r2d %66:grx32bit = COPY %65.subreg_l32:gr64bit CHIMux %66:grx32bit, 0, implicit-def $cc BRC 14, 6, %bb.32, implicit killed $cc J %bb.31

bb.31.if.end.for.inc_crit_edge: ; predecessors: %bb.30 successors: %bb.33(0x80000000); %bb.33(100.00%)

nomerge CallBRASL @__sanitizer_cov_trace_pc, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc J %bb.33

bb.32.if.then18: ; predecessors: %bb.30 successors: %bb.33(0x80000000); %bb.33(100.00%)

nomerge CallBRASL @__sanitizer_cov_trace_pc, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc $r2d = COPY %3:gr64bit CallBRASL @nvif_event_block, $r2d, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def dead $r2d

bb.33.for.inc: ; predecessors: %bb.31, %bb.32 successors: %bb.35(0x04000000), %bb.34(0x7c000000); %bb.35(3.12%), %bb.34(96.88%)

CGR %6:gr64bit, %1:addr64bit, implicit-def $cc BRC 14, 8, %bb.35, implicit killed $cc J %bb.34

bb.34.for.inc.for.body_crit_edge: ; predecessors: %bb.33 successors: %bb.5(0x80000000); %bb.5(100.00%)

nomerge CallBRASL @__sanitizer_cov_trace_pc, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc %76:addr64bit = COPY %6:gr64bit J %bb.5

bb.35.for.inc.for.end_crit_edge: ; predecessors: %bb.33 successors: %bb.36(0x80000000); %bb.36(100.00%)

nomerge CallBRASL @__sanitizer_cov_trace_pc, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc

bb.36.for.end: ; predecessors: %bb.35, %bb.3 successors: %bb.37(0x00000800), %bb.39(0x7ffff800); %bb.37(0.00%), %bb.39(100.00%)

%0:gr64bit = COPY %17:gr64bit %12:gr64bit = LA %14:addr64bit, 344, $noreg %68:gr64bit = SRLG %12:gr64bit, $noreg, 3 %69:gr64bit = LLIHH 28 %70:addr64bit = AGRK %68:gr64bit, %69:gr64bit, implicit-def dead $cc %13:gr32bit = LBMux %70:addr64bit, 0, $noreg :: (load (s8) from %ir.61) CHIMux %13:gr32bit, 0, implicit-def $cc BRC 14, 8, %bb.39, implicit killed $cc J %bb.37

bb.37 (%ir-block.64): ; predecessors: %bb.36 successors: %bb.38(0x40000000), %bb.39(0x40000000); %bb.38(50.00%), %bb.39(50.00%)

%71:grx32bit = COPY %12.subreg_l32:gr64bit %72:grx32bit = RISBMux $noreg(tied-def 0), %71:grx32bit, 29, 159, 0 %73:gr32bit = AHIMuxK %72:grx32bit, 3, implicit-def dead $cc CR %73:gr32bit, %13:gr32bit, implicit-def $cc BRC 14, 4, %bb.39, implicit killed $cc J %bb.38

bb.38 (%ir-block.70): ; predecessors: %bb.37 successors: %bb.39(0x80000000); %bb.39(100.00%)

$r2d = COPY %12:gr64bit nomerge CallBRASL @__asan_report_store4_noabort, $r2d, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc

bb.39 (%ir-block.72): ; predecessors: %bb.36, %bb.37, %bb.38

MVHI %14:addr64bit, 344, 1 :: (store (s32) into %ir.sunkaddr, align 8) $r2d = COPY %14:addr64bit $r3d = COPY %0:gr64bit CallJG @_raw_spin_unlock_irqrestore, <regmask $f8d $f9d $f10d $f11d $f12d $f13d $f14d $f15d $f8h $f9h $f10h $f11h $f12h $f13h $f14h $f15h $f8q $f9q $f12q $f13q $f8s $f9s $f10s $f11s $f12s $f13s $f14s $f15s $r6d $r7d $r8d $r9d $r10d and 30 more...>, implicit $r2d, implicit $r3d

End machine code for function nouveau_fence_context_kill.

*** Bad machine code: Using an undefined physical register ***

  • function: nouveau_fence_context_kill
  • basic block: %bb.20 cond.false.i45 (0x5559e7064b80)
  • instruction: %75:gr32bit = IPM implicit $cc
  • operand 1: implicit $cc

*** Bad machine code: Using an undefined physical register ***

  • function: nouveau_fence_context_kill
  • basic block: %bb.20 cond.false.i45 (0x5559e7064b80)
  • instruction: %52:gr32bit = IPM implicit $cc
  • operand 1: implicit $cc fatal error: error in backend: Found 2 machine code errors. PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/<https://github.com/llvm/llvm-project/issues/ > and include the crash backtrace, preprocessed source, and associated run script. Stack dump:
  1.  Program arguments: clang ... -o drivers/gpu/drm/nouveau/nouveau_fence.o drivers/gpu/drm/nouveau/nouveau_fence.c
    
  2.  <eof> parser at end of file
    
  3.  Code generation
    
  4.  Running pass 'Function Pass Manager' on module 'drivers/gpu/drm/nouveau/nouveau_fence.c'.
    
  5.  Running pass 'Live Interval Analysis' on function ***@***.***_fence_context_kill'
    

#0 0x00005559dd792528 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (clang+0x36fa528) #1 0x00005559dd78fca5 llvm::sys::RunSignalHandlers() (clang+0x36f7ca5) #2 0x00005559dd711e47 (anonymous namespace)::CrashRecoveryContextImpl::HandleCrash(int, unsigned long) CrashRecoveryContext.cpp:0:0 #3 0x00005559dd711ddf llvm::CrashRecoveryContext::HandleExit(int) (clang+0x3679ddf) #4 0x00005559dd78c767 llvm::sys::Process::Exit(int, bool) (clang+0x36f4767) #5 0x00005559dc3cb136 (clang+0x2333136) #6 0x00005559dd718206 llvm::report_fatal_error(llvm::Twine const&, bool) (clang+0x3680206) #7 0x00005559dcdaa41e (clang+0x2d1241e) #8 0x00005559dcdaaa86 llvm::MachineFunction::verify(llvm::Pass*, char const*, llvm::raw_ostream*, bool) const (clang+0x2d12a86) #9 0x00005559dcc43cc2 llvm::LiveRangeCalc::findReachingDefs(llvm::LiveRange&, llvm::MachineBasicBlock&, llvm::SlotIndex, llvm::Register, llvm::ArrayRefllvm::SlotIndex) (clang+0x2babcc2) #10 0x00005559dcc42f5b llvm::LiveRangeCalc::extend(llvm::LiveRange&, llvm::SlotIndex, llvm::Register, llvm::ArrayRefllvm::SlotIndex) (clang+0x2baaf5b) #11 0x00005559dcc46d19 llvm::LiveIntervalCalc::extendToUses(llvm::LiveRange&, llvm::Register, llvm::LaneBitmask, llvm::LiveInterval*) (clang+0x2baed19) #12 0x00005559dcc2ee43 llvm::LiveIntervals::computeRegUnitRange(llvm::LiveRange&, unsigned int) (clang+0x2b96e43) #13 0x00005559dcc2e1af llvm::LiveIntervals::computeLiveInRegUnits() (clang+0x2b961af) #14 0x00005559dcc2caff llvm::LiveIntervals::analyze(llvm::MachineFunction&) (clang+0x2b94aff) #15 0x00005559dcc2c8ac llvm::LiveIntervalsWrapperPass::runOnMachineFunction(llvm::MachineFunction&) (clang+0x2b948ac) #16 0x00005559dccdff23 llvm::MachineFunctionPass::runOnFunction(llvm::Function&) (clang+0x2c47f23) #17 0x00005559dd278cb8 llvm::FPPassManager::runOnFunction(llvm::Function&) (clang+0x31e0cb8) #18 0x00005559dd2803d2 llvm::FPPassManager::runOnModule(llvm::Module&) (clang+0x31e83d2) #19 0x00005559dd2796a0 llvm::legacy::PassManagerImpl::run(llvm::Module&) (clang+0x31e16a0) #20 0x00005559ddeed7de clang::emitBackendOutput(clang::CompilerInstance&, clang::CodeGenOptions&, llvm::StringRef, llvm::Module*, clang::BackendAction, llvm::IntrusiveRefCntPtrllvm::vfs::FileSystem, std::unique_ptr<llvm::raw_pwrite_stream, std::default_deletellvm::raw_pwrite_stream>, clang::BackendConsumer*) (clang+0x3e557de) #21 0x00005559ddf03008 clang::BackendConsumer::HandleTranslationUnit(clang::ASTContext&) (clang+0x3e6b008) #22 0x00005559df505819 clang::ParseAST(clang::Sema&, bool, bool) (clang+0x546d819) #23 0x00005559de3ffb16 clang::FrontendAction::Execute() (clang+0x4367b16) #24 0x00005559de3687cd clang::CompilerInstance::ExecuteAction(clang::FrontendAction&) (clang+0x42d07cd) #25 0x00005559de4d7775 clang::ExecuteCompilerInvocation(clang::CompilerInstance*) (clang+0x443f775) #26 0x00005559dc3caa77 cc1_main(llvm::ArrayRef<char const*>, char const*, void*) (clang+0x2332a77) #27 0x00005559dc3c6875 ExecuteCC1Tool(llvm::SmallVectorImpl<char const*>&, llvm::ToolContext const&, llvm::IntrusiveRefCntPtrllvm::vfs::FileSystem) driver.cpp:0:0 #28 0x00005559dc3c8e4d int llvm::function_ref<int (llvm::SmallVectorImpl<char const*>&)>::callback_fn<clang_main(int, char**, llvm::ToolContext const&)::$_0>(long, llvm::SmallVectorImpl<char const*>&) driver.cpp:0:0 #29 0x00005559de1cda69 void llvm::function_ref<void ()>::callback_fn<clang::driver::CC1Command::Execute(llvm::ArrayRef<std::optionalllvm::StringRef>, std::__cxx11::basic_string<char, std::char_traits, std::allocator>, bool) const::$_0>(long) Job.cpp:0:0 #30 0x00005559dd711d7e llvm::CrashRecoveryContext::RunSafely(llvm::function_ref<void ()>) (clang+0x3679d7e) #31 0x00005559de1cd2a3 clang::driver::CC1Command::Execute(llvm::ArrayRef<std::optionalllvm::StringRef>, std::__cxx11::basic_string<char, std::char_traits, std::allocator>, bool) const (clang+0x41352a3) #32 0x00005559de18eadc clang::driver::Compilation::ExecuteCommand(clang::driver::Command const&, clang::driver::Command const*&, bool) const (clang+0x40f6adc) #33 0x00005559de18ecf7 clang::driver::Compilation::ExecuteJobs(clang::driver::JobList const&, llvm::SmallVectorImpl<std::pair<int, clang::driver::Command const*>>&, bool) const (clang+0x40f6cf7) #34 0x00005559de1a8578 clang::driver::Driver::ExecuteCompilation(clang::driver::Compilation&, llvm::SmallVectorImpl<std::pair<int, clang::driver::Command const*>>&) (clang+0x4110578) #35 0x00005559dc3c6130 clang_main(int, char**, llvm::ToolContext const&) (clang+0x232e130) #36 0x00005559dc3d6247 main (clang+0x233e247) #37 0x00007f2cc3027635 (/usr/lib/libc.so.6+0x27635) #38 0x00007f2cc30276e9 __libc_start_main (/usr/lib/libc.so.6+0x276e9) #39 0x00005559dc3c42e5 _start (clang+0x232c2e5) clang: error: clang frontend command failed with exit code 70 (use -v to see invocation) ClangBuiltLinux clang version 22.0.0git (https://github.com/llvm/llvm-project.git<https://github.com/llvm/llvm-project.git > 097f1e7625966673b881df63a241f755317b0bb9) ...

cvise spits out:

int arch_test_bit_cc, nouveau_fence_wait_legacy_fence; long jiffies, nouveau_fence_wait_legacy_wait; long nouveau_fence_wait_legacy() { long t = jiffies, timeout = nouveau_fence_wait_legacy_wait; while (nouveau_fence_wait_legacy_fence) asm("" : @.***"(arch_test_bit_cc)); return timeout - t; }

$ clang --target=s390x-linux-gnu -c -o /dev/null nouveau_fence.i

$ clang --target=s390x-linux-gnu -O2 -c -o /dev/null nouveau_fence.i

Machine code for function nouveau_fence_wait_legacy: NoPHIs, TracksLiveness, TiedOpsRewritten

Function Live Ins: $cc

bb.0.entry: successors: %bb.3(0x30000000), %bb.1(0x50000000); %bb.3(37.50%), %bb.1(62.50%) liveins: $cc %0:addr64bit = LARL @nouveau_fence_wait_legacy_fence CHSI %0:addr64bit, 0, 0, implicit-def $cc :: (dereferenceable load (s32) from @nouveau_fence_wait_legacy_fence, !tbaa !4) BRC 14, 8, %bb.3, implicit $cc J %bb.1

bb.1.while.body.lr.ph: ; predecessors: %bb.0 successors: %bb.2(0x80000000); %bb.2(100.00%)

INLINEASM &"" [maystore] [attdialect], $0:[regdef:GR32Bit], def dead %1:gr32bit, !8 %2:gr32bit = IPM implicit $cc %3:gr32bit = COPY %2:gr32bit %3:gr32bit = SRL %3:gr32bit(tied-def 0), $noreg, 28 STRL %3:gr32bit, @arch_test_bit_cc :: (store (s32) into @arch_test_bit_cc, !tbaa !4)

bb.2.while.body: ; predecessors: %bb.1, %bb.2 successors: %bb.2(0x80000000); %bb.2(100.00%)

J %bb.2

bb.3.while.end: ; predecessors: %bb.0

%4:gr64bit = LGRL @nouveau_fence_wait_legacy_wait :: (dereferenceable load (s64) from @nouveau_fence_wait_legacy_wait, !tbaa !9) %5:addr64bit = LARL @jiffies %6:gr64bit = COPY %4:gr64bit %6:gr64bit = nsw SG %6:gr64bit(tied-def 0), %5:addr64bit, 0, $noreg, implicit-def dead $cc :: (dereferenceable load (s64) from @jiffies, !tbaa !9) $r2d = COPY %6:gr64bit Return implicit killed $r2d

End machine code for function nouveau_fence_wait_legacy.

*** Bad machine code: Using an undefined physical register ***

  • function: nouveau_fence_wait_legacy
  • basic block: %bb.1 while.body.lr.ph (0x55dba4f72718)
  • instruction: %2:gr32bit = IPM implicit $cc
  • operand 1: implicit $cc fatal error: error in backend: Found 1 machine code errors. PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/<https://github.com/llvm/llvm-project/issues/ > and include the crash backtrace, preprocessed source, and associated run script. Stack dump:
  1.  Program arguments: clang --target=s390x-linux-gnu -O2 -c -o /dev/null nouveau_fence.i
    
  2.  <eof> parser at end of file
    
  3.  Code generation
    
  4.  Running pass 'Function Pass Manager' on module 'nouveau_fence.i'.
    
  5.  Running pass 'Live Interval Analysis' on function ***@***.***_fence_wait_legacy'
    

#0 0x000055db9b16c2b8 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (clang-22+0x36f42b8) #1 0x000055db9b169a35 llvm::sys::RunSignalHandlers() (clang-22+0x36f1a35) #2 0x000055db9b0ebb97 (anonymous namespace)::CrashRecoveryContextImpl::HandleCrash(int, unsigned long) CrashRecoveryContext.cpp:0:0 #3 0x000055db9b0ebb2f llvm::CrashRecoveryContext::HandleExit(int) (clang-22+0x3673b2f) #4 0x000055db9b1664f7 llvm::sys::Process::Exit(int, bool) (clang-22+0x36ee4f7) #5 0x000055db99da40c6 (clang-22+0x232c0c6) #6 0x000055db9b0f1f56 llvm::report_fatal_error(llvm::Twine const&, bool) (clang-22+0x3679f56) #7 0x000055db9a783e5e (clang-22+0x2d0be5e) #8 0x000055db9a7844c6 llvm::MachineFunction::verify(llvm::Pass*, char const*, llvm::raw_ostream*, bool) const (clang-22+0x2d0c4c6) #9 0x000055db9a61d5c2 llvm::LiveRangeCalc::findReachingDefs(llvm::LiveRange&, llvm::MachineBasicBlock&, llvm::SlotIndex, llvm::Register, llvm::ArrayRefllvm::SlotIndex) (clang-22+0x2ba55c2) #10 0x000055db9a61c85b llvm::LiveRangeCalc::extend(llvm::LiveRange&, llvm::SlotIndex, llvm::Register, llvm::ArrayRefllvm::SlotIndex) (clang-22+0x2ba485b) #11 0x000055db9a620629 llvm::LiveIntervalCalc::extendToUses(llvm::LiveRange&, llvm::Register, llvm::LaneBitmask, llvm::LiveInterval*) (clang-22+0x2ba8629) #12 0x000055db9a6087b3 llvm::LiveIntervals::computeRegUnitRange(llvm::LiveRange&, unsigned int) (clang-22+0x2b907b3) #13 0x000055db9a607b1f llvm::LiveIntervals::computeLiveInRegUnits() (clang-22+0x2b8fb1f) #14 0x000055db9a60646f llvm::LiveIntervals::analyze(llvm::MachineFunction&) (clang-22+0x2b8e46f) #15 0x000055db9a60621c llvm::LiveIntervalsWrapperPass::runOnMachineFunction(llvm::MachineFunction&) (clang-22+0x2b8e21c) #16 0x000055db9a6b9903 llvm::MachineFunctionPass::runOnFunction(llvm::Function&) (clang-22+0x2c41903) #17 0x000055db9ac52a48 llvm::FPPassManager::runOnFunction(llvm::Function&) (clang-22+0x31daa48) #18 0x000055db9ac5a162 llvm::FPPassManager::runOnModule(llvm::Module&) (clang-22+0x31e2162) #19 0x000055db9ac53430 llvm::legacy::PassManagerImpl::run(llvm::Module&) (clang-22+0x31db430) #20 0x000055db9b8c473e clang::emitBackendOutput(clang::CompilerInstance&, clang::CodeGenOptions&, llvm::StringRef, llvm::Module*, clang::BackendAction, llvm::IntrusiveRefCntPtrllvm::vfs::FileSystem, std::unique_ptr<llvm::raw_pwrite_stream, std::default_deletellvm::raw_pwrite_stream>, clang::BackendConsumer*) (clang-22+0x3e4c73e) #21 0x000055db9b8d9f68 clang::BackendConsumer::HandleTranslationUnit(clang::ASTContext&) (clang-22+0x3e61f68) #22 0x000055db9cee0559 clang::ParseAST(clang::Sema&, bool, bool) (clang-22+0x5468559) #23 0x000055db9bdd6b16 clang::FrontendAction::Execute() (clang-22+0x435eb16) #24 0x000055db9bd3f77d clang::CompilerInstance::ExecuteAction(clang::FrontendAction&) (clang-22+0x42c777d) #25 0x000055db9beaec05 clang::ExecuteCompilerInvocation(clang::CompilerInstance*) (clang-22+0x4436c05) #26 0x000055db99da3a07 cc1_main(llvm::ArrayRef<char const*>, char const*, void*) (clang-22+0x232ba07) #27 0x000055db99d9f7f5 ExecuteCC1Tool(llvm::SmallVectorImpl<char const*>&, llvm::ToolContext const&, llvm::IntrusiveRefCntPtrllvm::vfs::FileSystem) driver.cpp:0:0 #28 0x000055db99da1ddd int llvm::function_ref<int (llvm::SmallVectorImpl<char const*>&)>::callback_fn<clang_main(int, char**, llvm::ToolContext const&)::$_0>(long, llvm::SmallVectorImpl<char const*>&) driver.cpp:0:0 #29 0x000055db9bba4b69 void llvm::function_ref<void ()>::callback_fn<clang::driver::CC1Command::Execute(llvm::ArrayRef<std::optionalllvm::StringRef>, std::__cxx11::basic_string<char, std::char_traits, std::allocator>, bool) const::$_0>(long) Job.cpp:0:0 #30 0x000055db9b0ebace llvm::CrashRecoveryContext::RunSafely(llvm::function_ref<void ()>) (clang-22+0x3673ace) #31 0x000055db9bba43a3 clang::driver::CC1Command::Execute(llvm::ArrayRef<std::optionalllvm::StringRef>, std::__cxx11::basic_string<char, std::char_traits, std::allocator>, bool) const (clang-22+0x412c3a3) #32 0x000055db9bb65b2c clang::driver::Compilation::ExecuteCommand(clang::driver::Command const&, clang::driver::Command const*&, bool) const (clang-22+0x40edb2c) #33 0x000055db9bb65d47 clang::driver::Compilation::ExecuteJobs(clang::driver::JobList const&, llvm::SmallVectorImpl<std::pair<int, clang::driver::Command const*>>&, bool) const (clang-22+0x40edd47) #34 0x000055db9bb7f5f8 clang::driver::Driver::ExecuteCompilation(clang::driver::Compilation&, llvm::SmallVectorImpl<std::pair<int, clang::driver::Command const*>>&) (clang-22+0x41075f8) #35 0x000055db99d9f0b0 clang_main(int, char**, llvm::ToolContext const&) (clang-22+0x23270b0) #36 0x000055db99daf1d7 main (clang-22+0x23371d7) #37 0x00007f45f4827635 (/usr/lib/libc.so.6+0x27635) #38 0x00007f45f48276e9 __libc_start_main (/usr/lib/libc.so.6+0x276e9) #39 0x000055db99d9d265 _start (clang-22+0x2325265) clang: error: clang frontend command failed with exit code 70 (use -v to see invocation) ClangBuiltLinux clang version 22.0.0git (https://github.com/llvm/llvm-project.git<https://github.com/llvm/llvm-project.git > 6712e20c5261376a6b0015fb3c8d15124757d47d) ...

— Reply to this email directly, view it on GitHub<https://github.com/llvm/llvm-project/pull/125970#issuecomment-3407355089 >, or unsubscribe<https://github.com/notifications/unsubscribe-auth/BM5K4GVKOQXZW3RVFNCGEED3XZZ5DAVCNFSM6AAAAABWSJVEQKVHI2DSMVQWIX3LMV43OSLTON2WKQ3PNVWWK3TUHMZTIMBXGM2TKMBYHE >. You are receiving this because you authored the thread.Message ID: @.***>

anoopkg6 avatar Oct 16 '25 13:10 anoopkg6

I am seeing a crash while building the Linux kernel for ARCH=s390 after this change.

Should now be fixed by https://github.com/llvm/llvm-project/pull/165274

uweigand avatar Oct 27 '25 17:10 uweigand