llvm-project [InstSimplify] Fix incorrect poison propagation when folding phi

We can only replace phi(X, undef) with X, if X is known not to be poison. Otherwise, the result may be more poisonous on the undef branch.

Jun 25 '24 13:06 nikic

@llvm/pr-subscribers-backend-hexagon @llvm/pr-subscribers-llvm-analysis @llvm/pr-subscribers-backend-powerpc

@llvm/pr-subscribers-llvm-transforms

Author: Nikita Popov (nikic)

Changes

We can only replace phi(X, undef) with X, if X is known not to be poison. Otherwise, the result may be more poisonous on the undef branch.

Patch is 23.22 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/96631.diff

13 Files Affected:

(modified) llvm/lib/Analysis/InstructionSimplify.cpp (+8-1)
(modified) llvm/test/CodeGen/AArch64/convertphitype.ll (+3-2)
(modified) llvm/test/CodeGen/AArch64/sve-breakdown-scalable-vectortype.ll (+21-21)
(modified) llvm/test/CodeGen/Hexagon/frame-offset-overflow.ll (+1-1)
(modified) llvm/test/CodeGen/Hexagon/trunc-mpy.ll (+1-1)
(modified) llvm/test/CodeGen/PowerPC/sms-phi-5.ll (+1-1)
(modified) llvm/test/Transforms/IndVarSimplify/invalidate-modified-lcssa-phi.ll (+4-3)
(modified) llvm/test/Transforms/InstCombine/phi.ll (+2-1)
(modified) llvm/test/Transforms/InstSimplify/phi.ll (+2-1)
(modified) llvm/test/Transforms/LoopDeletion/pr53969.ll (+1-1)
(modified) llvm/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll (+12-12)
(modified) llvm/test/Transforms/LoopVectorize/pr31190.ll (+2-2)
(modified) llvm/test/Transforms/LoopVectorize/uniform-blend.ll (+10-10)

diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index a452add93a7e7..c5fea8f93ef93 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -5278,7 +5278,14 @@ static Value *simplifyPHINode(PHINode *PN, ArrayRef<Value *> IncomingValues,
     // If we have a PHI node like phi(X, undef, X), where X is defined by some
     // instruction, we cannot return X as the result of the PHI node unless it
     // dominates the PHI block.
-    return valueDominatesPHI(CommonValue, PN, Q.DT) ? CommonValue : nullptr;
+    if (!valueDominatesPHI(CommonValue, PN, Q.DT))
+      return nullptr;
+
+    // Make sure we do not replace an undef value with poison.
+    if (HasUndefInput &&
+        !isGuaranteedNotToBePoison(CommonValue, Q.AC, Q.CxtI, Q.DT))
+      return nullptr;
+    return CommonValue;
   }
 
   return CommonValue;
diff --git a/llvm/test/CodeGen/AArch64/convertphitype.ll b/llvm/test/CodeGen/AArch64/convertphitype.ll
index b723b470266a7..e690653237a2d 100644
--- a/llvm/test/CodeGen/AArch64/convertphitype.ll
+++ b/llvm/test/CodeGen/AArch64/convertphitype.ll
@@ -341,6 +341,7 @@ define float @convphi_loopdelayed(ptr %s, ptr %d, i64 %n) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP15:%.*]] = icmp sgt i64 [[N:%.*]], 0
 ; CHECK-NEXT:    [[LS:%.*]] = load i32, ptr [[S:%.*]], align 4
+; CHECK-NEXT:    [[LS_BC:%.*]] = bitcast i32 [[LS]] to float
 ; CHECK-NEXT:    br i1 [[CMP15]], label [[LOOP:%.*]], label [[END:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
@@ -349,8 +350,8 @@ define float @convphi_loopdelayed(ptr %s, ptr %d, i64 %n) {
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END]], label [[LOOP]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[B:%.*]] = bitcast i32 [[LS]] to float
-; CHECK-NEXT:    ret float [[B]]
+; CHECK-NEXT:    [[PHI_TC:%.*]] = phi float [ undef, [[ENTRY]] ], [ [[LS_BC]], [[LOOP]] ]
+; CHECK-NEXT:    ret float [[PHI_TC]]
 ;
 entry:
   %cmp15 = icmp sgt i64 %n, 0
diff --git a/llvm/test/CodeGen/AArch64/sve-breakdown-scalable-vectortype.ll b/llvm/test/CodeGen/AArch64/sve-breakdown-scalable-vectortype.ll
index 5b69b68552a4d..3645af3ccaee0 100644
--- a/llvm/test/CodeGen/AArch64/sve-breakdown-scalable-vectortype.ll
+++ b/llvm/test/CodeGen/AArch64/sve-breakdown-scalable-vectortype.ll
@@ -33,7 +33,7 @@ define <vscale x 32 x i8> @wide_32i8(i1 %b, <vscale x 16 x i8> %legal, <vscale x
   br i1 %b, label %L1, label %L2
 L1:
   call aarch64_sve_vector_pcs void @bar()
-  ret <vscale x 32 x i8> undef
+  ret <vscale x 32 x i8> poison
 L2:
   ret <vscale x 32 x i8> %illegal
 }
@@ -61,7 +61,7 @@ define <vscale x 16 x i16> @wide_16i16(i1 %b, <vscale x 16 x i8> %legal, <vscale
   br i1 %b, label %L1, label %L2
 L1:
   call aarch64_sve_vector_pcs void @bar()
-  ret <vscale x 16 x i16> undef
+  ret <vscale x 16 x i16> poison
 L2:
   ret <vscale x 16 x i16> %illegal
 }
@@ -89,7 +89,7 @@ define <vscale x 8 x i32> @wide_8i32(i1 %b, <vscale x 16 x i8> %legal, <vscale x
   br i1 %b, label %L1, label %L2
 L1:
   call aarch64_sve_vector_pcs void @bar()
-  ret <vscale x 8 x i32> undef
+  ret <vscale x 8 x i32> poison
 L2:
   ret <vscale x 8 x i32> %illegal
 }
@@ -117,7 +117,7 @@ define <vscale x 4 x i64> @wide_4i64(i1 %b, <vscale x 16 x i8> %legal, <vscale x
   br i1 %b, label %L1, label %L2
 L1:
   call aarch64_sve_vector_pcs void @bar()
-  ret <vscale x 4 x i64> undef
+  ret <vscale x 4 x i64> poison
 L2:
   ret <vscale x 4 x i64> %illegal
 }
@@ -145,7 +145,7 @@ define <vscale x 16 x half> @wide_16f16(i1 %b, <vscale x 16 x i8> %legal, <vscal
   br i1 %b, label %L1, label %L2
 L1:
   call aarch64_sve_vector_pcs void @bar()
-  ret <vscale x 16 x half> undef
+  ret <vscale x 16 x half> poison
 L2:
   ret <vscale x 16 x half> %illegal
 }
@@ -173,7 +173,7 @@ define <vscale x 8 x float> @wide_8f32(i1 %b, <vscale x 16 x i8> %legal, <vscale
   br i1 %b, label %L1, label %L2
 L1:
   call aarch64_sve_vector_pcs void @bar()
-  ret <vscale x 8 x float> undef
+  ret <vscale x 8 x float> poison
 L2:
   ret <vscale x 8 x float> %illegal
 }
@@ -201,7 +201,7 @@ define <vscale x 4 x double> @wide_4f64(i1 %b, <vscale x 16 x i8> %legal, <vscal
   br i1 %b, label %L1, label %L2
 L1:
   call aarch64_sve_vector_pcs void @bar()
-  ret <vscale x 4 x double> undef
+  ret <vscale x 4 x double> poison
 L2:
   ret <vscale x 4 x double> %illegal
 }
@@ -237,7 +237,7 @@ define <vscale x 48 x i8> @wide_48i8(i1 %b, <vscale x 16 x i8> %legal, <vscale x
   br i1 %b, label %L1, label %L2
 L1:
   call aarch64_sve_vector_pcs void @bar()
-  ret <vscale x 48 x i8> undef
+  ret <vscale x 48 x i8> poison
 L2:
   ret <vscale x 48 x i8> %illegal
 }
@@ -269,7 +269,7 @@ define <vscale x 24 x i16> @wide_24i16(i1 %b, <vscale x 16 x i8> %legal, <vscale
   br i1 %b, label %L1, label %L2
 L1:
   call aarch64_sve_vector_pcs void @bar()
-  ret <vscale x 24 x i16> undef
+  ret <vscale x 24 x i16> poison
 L2:
   ret <vscale x 24 x i16> %illegal
 }
@@ -301,7 +301,7 @@ define <vscale x 12 x i32> @wide_12i32(i1 %b, <vscale x 16 x i8> %legal, <vscale
   br i1 %b, label %L1, label %L2
 L1:
   call aarch64_sve_vector_pcs void @bar()
-  ret <vscale x 12 x i32> undef
+  ret <vscale x 12 x i32> poison
 L2:
   ret <vscale x 12 x i32> %illegal
 }
@@ -333,7 +333,7 @@ define <vscale x 6 x i64> @wide_6i64(i1 %b, <vscale x 16 x i8> %legal, <vscale x
   br i1 %b, label %L1, label %L2
 L1:
   call aarch64_sve_vector_pcs void @bar()
-  ret <vscale x 6 x i64> undef
+  ret <vscale x 6 x i64> poison
 L2:
   ret <vscale x 6 x i64> %illegal
 }
@@ -365,7 +365,7 @@ define <vscale x 24 x half> @wide_24f16(i1 %b, <vscale x 16 x i8> %legal, <vscal
   br i1 %b, label %L1, label %L2
 L1:
   call aarch64_sve_vector_pcs void @bar()
-  ret <vscale x 24 x half> undef
+  ret <vscale x 24 x half> poison
 L2:
   ret <vscale x 24 x half> %illegal
 }
@@ -397,7 +397,7 @@ define <vscale x 12 x float> @wide_12f32(i1 %b, <vscale x 16 x i8> %legal, <vsca
   br i1 %b, label %L1, label %L2
 L1:
   call aarch64_sve_vector_pcs void @bar()
-  ret <vscale x 12 x float> undef
+  ret <vscale x 12 x float> poison
 L2:
   ret <vscale x 12 x float> %illegal
 }
@@ -429,7 +429,7 @@ define <vscale x 6 x double> @wide_6f64(i1 %b, <vscale x 16 x i8> %legal, <vscal
   br i1 %b, label %L1, label %L2
 L1:
   call aarch64_sve_vector_pcs void @bar()
-  ret <vscale x 6 x double> undef
+  ret <vscale x 6 x double> poison
 L2:
   ret <vscale x 6 x double> %illegal
 }
@@ -469,7 +469,7 @@ define <vscale x 64 x i8> @wide_64i8(i1 %b, <vscale x 16 x i8> %legal, <vscale x
   br i1 %b, label %L1, label %L2
 L1:
   call aarch64_sve_vector_pcs void @bar()
-  ret <vscale x 64 x i8> undef
+  ret <vscale x 64 x i8> poison
 L2:
   ret <vscale x 64 x i8> %illegal
 }
@@ -505,7 +505,7 @@ define <vscale x 32 x i16> @wide_32i16(i1 %b, <vscale x 16 x i8> %legal, <vscale
   br i1 %b, label %L1, label %L2
 L1:
   call aarch64_sve_vector_pcs void @bar()
-  ret <vscale x 32 x i16> undef
+  ret <vscale x 32 x i16> poison
 L2:
   ret <vscale x 32 x i16> %illegal
 }
@@ -541,7 +541,7 @@ define <vscale x 16 x i32> @wide_16i32(i1 %b, <vscale x 16 x i8> %legal, <vscale
   br i1 %b, label %L1, label %L2
 L1:
   call aarch64_sve_vector_pcs void @bar()
-  ret <vscale x 16 x i32> undef
+  ret <vscale x 16 x i32> poison
 L2:
   ret <vscale x 16 x i32> %illegal
 }
@@ -577,7 +577,7 @@ define <vscale x 8 x i64> @wide_8i64(i1 %b, <vscale x 16 x i8> %legal, <vscale x
   br i1 %b, label %L1, label %L2
 L1:
   call aarch64_sve_vector_pcs void @bar()
-  ret <vscale x 8 x i64> undef
+  ret <vscale x 8 x i64> poison
 L2:
   ret <vscale x 8 x i64> %illegal
 }
@@ -613,7 +613,7 @@ define <vscale x 32 x half> @wide_32f16(i1 %b, <vscale x 16 x i8> %legal, <vscal
   br i1 %b, label %L1, label %L2
 L1:
   call aarch64_sve_vector_pcs void @bar()
-  ret <vscale x 32 x half> undef
+  ret <vscale x 32 x half> poison
 L2:
   ret <vscale x 32 x half> %illegal
 }
@@ -649,7 +649,7 @@ define <vscale x 16 x float> @wide_16f32(i1 %b, <vscale x 16 x i8> %legal, <vsca
   br i1 %b, label %L1, label %L2
 L1:
   call aarch64_sve_vector_pcs void @bar()
-  ret <vscale x 16 x float> undef
+  ret <vscale x 16 x float> poison
 L2:
   ret <vscale x 16 x float> %illegal
 }
@@ -685,7 +685,7 @@ define <vscale x 8 x double> @wide_8f64(i1 %b, <vscale x 16 x i8> %legal, <vscal
   br i1 %b, label %L1, label %L2
 L1:
   call aarch64_sve_vector_pcs void @bar()
-  ret <vscale x 8 x double> undef
+  ret <vscale x 8 x double> poison
 L2:
   ret <vscale x 8 x double> %illegal
 }
diff --git a/llvm/test/CodeGen/Hexagon/frame-offset-overflow.ll b/llvm/test/CodeGen/Hexagon/frame-offset-overflow.ll
index 861a54f893f0f..8b10305497503 100644
--- a/llvm/test/CodeGen/Hexagon/frame-offset-overflow.ll
+++ b/llvm/test/CodeGen/Hexagon/frame-offset-overflow.ll
@@ -34,7 +34,7 @@ for.body:                                         ; preds = %for.body, %entry
   %optr.0102 = phi ptr [ %incdec.ptr24.3, %for.body ], [ %p3, %entry ]
   %iptr4.0101 = phi ptr [ %incdec.ptr23.3, %for.body ], [ %incdec.ptr18, %entry ]
   %iptr3.0100 = phi ptr [ %incdec.ptr22.3, %for.body ], [ %incdec.ptr17, %entry ]
-  %iptr2.099 = phi ptr [ undef, %for.body ], [ %incdec.ptr16, %entry ]
+  %iptr2.099 = phi ptr [ poison, %for.body ], [ %incdec.ptr16, %entry ]
   %iptr1.098 = phi ptr [ %incdec.ptr20.3, %for.body ], [ %incdec.ptr15, %entry ]
   %iptr0.097 = phi ptr [ %incdec.ptr19.3, %for.body ], [ %incdec.ptr, %entry ]
   %dVsumv1.096 = phi <32 x i32> [ %60, %for.body ], [ undef, %entry ]
diff --git a/llvm/test/CodeGen/Hexagon/trunc-mpy.ll b/llvm/test/CodeGen/Hexagon/trunc-mpy.ll
index ee2a85008019e..ea132495cde85 100644
--- a/llvm/test/CodeGen/Hexagon/trunc-mpy.ll
+++ b/llvm/test/CodeGen/Hexagon/trunc-mpy.ll
@@ -69,7 +69,7 @@ b1:                                               ; preds = %b0
 
 b2:                                               ; preds = %b2, %b1
   %v2 = phi ptr [ %v0, %b1 ], [ %v14, %b2 ]
-  %v3 = phi ptr [ %v1, %b1 ], [ undef, %b2 ]
+  %v3 = phi ptr [ %v1, %b1 ], [ poison, %b2 ]
   %v4 = phi ptr [ null, %b1 ], [ %v6, %b2 ]
   %v5 = load i32, ptr %v4, align 4
   %v6 = getelementptr inbounds i32, ptr %v4, i32 2
diff --git a/llvm/test/CodeGen/PowerPC/sms-phi-5.ll b/llvm/test/CodeGen/PowerPC/sms-phi-5.ll
index 5a4aa8fdfbf39..5dcfdf5cc8124 100644
--- a/llvm/test/CodeGen/PowerPC/sms-phi-5.ll
+++ b/llvm/test/CodeGen/PowerPC/sms-phi-5.ll
@@ -35,7 +35,7 @@ define void @phi5() unnamed_addr {
   %4 = phi i16 [ %18, %3 ], [ undef, %1 ]
   %5 = phi i16 [ %13, %3 ], [ undef, %1 ]
   %6 = phi i16 [ %11, %3 ], [ undef, %1 ]
-  %7 = phi i16 [ undef, %3 ], [ %2, %1 ]
+  %7 = phi i16 [ poison, %3 ], [ %2, %1 ]
   %8 = phi i32 [ %19, %3 ], [ undef, %1 ]
   %9 = lshr i16 %6, 1
   %10 = shl i16 %7, 15
diff --git a/llvm/test/Transforms/IndVarSimplify/invalidate-modified-lcssa-phi.ll b/llvm/test/Transforms/IndVarSimplify/invalidate-modified-lcssa-phi.ll
index 856fc37620499..4833d22de07b5 100644
--- a/llvm/test/Transforms/IndVarSimplify/invalidate-modified-lcssa-phi.ll
+++ b/llvm/test/Transforms/IndVarSimplify/invalidate-modified-lcssa-phi.ll
@@ -47,17 +47,18 @@ define i8 @test_pr52023(i1 %c.1, i1 %c.2) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[LOOP_1:%.*]]
 ; CHECK:       loop.1:
-; CHECK-NEXT:    [[INC79:%.*]] = phi i8 [ [[TMP0:%.*]], [[LOOP_1_LATCH:%.*]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[TMP0]] = add i8 [[INC79]], 1
+; CHECK-NEXT:    [[INC79:%.*]] = phi i8 [ [[INC_LCSSA:%.*]], [[LOOP_1_LATCH:%.*]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i8 [[INC79]], 1
 ; CHECK-NEXT:    br label [[LOOP_2:%.*]]
 ; CHECK:       loop.2:
 ; CHECK-NEXT:    br i1 [[C_1:%.*]], label [[LOOP_2_LATCH:%.*]], label [[LOOP_1_LATCH]]
 ; CHECK:       loop.2.latch:
 ; CHECK-NEXT:    br label [[LOOP_1_LATCH]]
 ; CHECK:       loop.1.latch:
+; CHECK-NEXT:    [[INC_LCSSA]] = phi i8 [ [[TMP0]], [[LOOP_2_LATCH]] ], [ undef, [[LOOP_2]] ]
 ; CHECK-NEXT:    br i1 [[C_2:%.*]], label [[EXIT:%.*]], label [[LOOP_1]]
 ; CHECK:       exit:
-; CHECK-NEXT:    [[INC_LCSSA_LCSSA:%.*]] = phi i8 [ [[TMP0]], [[LOOP_1_LATCH]] ]
+; CHECK-NEXT:    [[INC_LCSSA_LCSSA:%.*]] = phi i8 [ [[INC_LCSSA]], [[LOOP_1_LATCH]] ]
 ; CHECK-NEXT:    ret i8 [[INC_LCSSA_LCSSA]]
 ;
 entry:
diff --git a/llvm/test/Transforms/InstCombine/phi.ll b/llvm/test/Transforms/InstCombine/phi.ll
index b12982dd27e40..0ffd7b237e762 100644
--- a/llvm/test/Transforms/InstCombine/phi.ll
+++ b/llvm/test/Transforms/InstCombine/phi.ll
@@ -93,9 +93,10 @@ define i32 @test5_undef(i32 %A, i1 %cond) {
 ; CHECK-NEXT:  BB0:
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       Loop:
+; CHECK-NEXT:    [[B:%.*]] = phi i32 [ [[A:%.*]], [[BB0:%.*]] ], [ undef, [[LOOP]] ]
 ; CHECK-NEXT:    br i1 [[COND:%.*]], label [[LOOP]], label [[EXIT:%.*]]
 ; CHECK:       Exit:
-; CHECK-NEXT:    ret i32 [[A:%.*]]
+; CHECK-NEXT:    ret i32 [[B]]
 ;
 BB0:
   br label %Loop
diff --git a/llvm/test/Transforms/InstSimplify/phi.ll b/llvm/test/Transforms/InstSimplify/phi.ll
index 5cc3fecb129f4..496a3c896944b 100644
--- a/llvm/test/Transforms/InstSimplify/phi.ll
+++ b/llvm/test/Transforms/InstSimplify/phi.ll
@@ -81,7 +81,8 @@ define i32 @undef(i1 %cond, i32 %v) {
 ; CHECK:       B:
 ; CHECK-NEXT:    br label [[EXIT]]
 ; CHECK:       EXIT:
-; CHECK-NEXT:    ret i32 [[V:%.*]]
+; CHECK-NEXT:    [[W:%.*]] = phi i32 [ [[V:%.*]], [[A]] ], [ undef, [[B]] ]
+; CHECK-NEXT:    ret i32 [[W]]
 ;
   br i1 %cond, label %A, label %B
 A:
diff --git a/llvm/test/Transforms/LoopDeletion/pr53969.ll b/llvm/test/Transforms/LoopDeletion/pr53969.ll
index f9b9dc3373b5e..3b8904e4457f8 100644
--- a/llvm/test/Transforms/LoopDeletion/pr53969.ll
+++ b/llvm/test/Transforms/LoopDeletion/pr53969.ll
@@ -44,7 +44,7 @@ define void @test() {
 ; CHECK-NEXT:    br i1 false, label [[BB11]], label [[BB12:%.*]]
 ; CHECK:       bb42:
 ; CHECK-NEXT:    [[TMP24_LCSSA:%.*]] = phi i32 [ undef, [[BB22]] ]
-; CHECK-NEXT:    [[TMP18_LCSSA4:%.*]] = phi i64 [ 0, [[BB22]] ]
+; CHECK-NEXT:    [[TMP18_LCSSA4:%.*]] = phi i64 [ undef, [[BB22]] ]
 ; CHECK-NEXT:    store atomic i64 [[TMP18_LCSSA4]], ptr addrspace(1) undef unordered, align 8
 ; CHECK-NEXT:    call void @use(i32 [[TMP24_LCSSA]])
 ; CHECK-NEXT:    ret void
diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll
index c4aa6c7725d41..b0b8234b18380 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll
@@ -156,29 +156,29 @@ define fastcc void @test3(ptr nocapture %u) nounwind uwtable ssp {
 ; CHECK:       for.inc8.us.i:
 ; CHECK-NEXT:    br i1 true, label [[MESHBB1_LOOPEXIT:%.*]], label [[MESHBB:%.*]]
 ; CHECK:       for.body3.us.i:
-; CHECK-NEXT:    [[INDVARS_IV_I_SV_PHI:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_I:%.*]], [[MESHBB]] ], [ 0, [[FOR_BODY3_LR_PH_US_I:%.*]] ]
+; CHECK-NEXT:    [[LSR_IV1:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[MESHBB]] ], [ [[TMP2:%.*]], [[FOR_BODY3_LR_PH_US_I:%.*]] ]
+; CHECK-NEXT:    [[LSR_IV:%.*]] = phi ptr [ [[SCEVGEP:%.*]], [[MESHBB]] ], [ [[U:%.*]], [[FOR_BODY3_LR_PH_US_I]] ]
 ; CHECK-NEXT:    [[OPQ_SA_CALC12:%.*]] = sub i32 undef, 227
-; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[LSR_IV:%.*]], [[INDVARS_IV_I_SV_PHI]]
-; CHECK-NEXT:    [[TMP:%.*]] = trunc i64 [[TMP0]] to i32
-; CHECK-NEXT:    [[MUL_I_US_I:%.*]] = mul nsw i32 0, [[TMP]]
-; CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[INDVARS_IV_I_SV_PHI]], 3
-; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[U:%.*]], i64 [[TMP1]]
-; CHECK-NEXT:    [[TMP2:%.*]] = load double, ptr [[SCEVGEP]], align 8
+; CHECK-NEXT:    [[MUL_I_US_I:%.*]] = mul nsw i32 0, [[LSR_IV1]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr [[LSR_IV]], align 8
 ; CHECK-NEXT:    br i1 undef, label [[FOR_INC8_US_I:%.*]], label [[MESHBB]]
 ; CHECK:       for.body3.lr.ph.us.i.loopexit:
-; CHECK-NEXT:    [[LSR_IV_NEXT:%.*]] = add i64 [[LSR_IV]], 1
 ; CHECK-NEXT:    br label [[FOR_BODY3_LR_PH_US_I]]
 ; CHECK:       for.body3.lr.ph.us.i:
-; CHECK-NEXT:    [[LSR_IV]] = phi i64 [ [[LSR_IV_NEXT]], [[FOR_BODY3_LR_PH_US_I_LOOPEXIT:%.*]] ], [ undef, [[MESHBB1]] ]
-; CHECK-NEXT:    [[ARRAYIDX_US_I:%.*]] = getelementptr inbounds double, ptr undef, i64 [[LSR_IV]]
+; CHECK-NEXT:    [[INDVARS_IV8_I_SV_PHI26:%.*]] = phi i64 [ poison, [[MESHBB1]] ], [ [[INDVARS_IV8_I_SV_PHI24:%.*]], [[FOR_BODY3_LR_PH_US_I_LOOPEXIT:%.*]] ]
+; CHECK-NEXT:    [[ARRAYIDX_US_I:%.*]] = getelementptr inbounds double, ptr undef, i64 [[INDVARS_IV8_I_SV_PHI26]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDVARS_IV8_I_SV_PHI26]], 1
+; CHECK-NEXT:    [[TMP2]] = trunc i64 [[INDVARS_IV8_I_SV_PHI26]] to i32
 ; CHECK-NEXT:    br label [[FOR_BODY3_US_I:%.*]]
 ; CHECK:       for.inc8.us.i2:
 ; CHECK-NEXT:    unreachable
 ; CHECK:       eval_At_times_u.exit:
 ; CHECK-NEXT:    ret void
 ; CHECK:       meshBB:
+; CHECK-NEXT:    [[INDVARS_IV8_I_SV_PHI24]] = phi i64 [ undef, [[FOR_BODY3_US_I]] ], [ [[TMP1]], [[FOR_INC8_US_I]] ]
 ; CHECK-NEXT:    [[MESHSTACKVARIABLE_PHI:%.*]] = phi i32 [ [[OPQ_SA_CALC12]], [[FOR_BODY3_US_I]] ], [ undef, [[FOR_INC8_US_I]] ]
-; CHECK-NEXT:    [[INDVARS_IV_NEXT_I]] = add i64 [[INDVARS_IV_I_SV_PHI]], 1
+; CHECK-NEXT:    [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV]], i64 8
+; CHECK-NEXT:    [[LSR_IV_NEXT]] = add i32 [[LSR_IV1]], 1
 ; CHECK-NEXT:    br i1 true, label [[FOR_BODY3_LR_PH_US_I_LOOPEXIT]], label [[FOR_BODY3_US_I]]
 ; CHECK:       meshBB1.loopexit:
 ; CHECK-NEXT:    br label [[MESHBB1]]
@@ -206,7 +206,7 @@ for.body3.us.i:                                   ; preds = %meshBB, %for.body3.
   br i1 undef, label %for.inc8.us.i, label %meshBB
 
 for.body3.lr.ph.us.i:                             ; preds = %meshBB1, %meshBB
-  %indvars.iv8.i.SV.phi26 = phi i64 [ undef, %meshBB1 ], [ %indvars.iv8.i.SV.phi24, %meshBB ]
+  %indvars.iv8.i.SV.phi26 = phi i64 [ poison, %meshBB1 ], [ %indvars.iv8.i.SV.phi24, %meshBB ]
   %arrayidx.us.i = getelementptr inbounds double, ptr undef, i64 %indvars.iv8.i.SV.phi26
   %3 = add i64 %indvars.iv8.i.SV.phi26, 1
   br label %for.body3.us.i
diff --git a/llvm/test/Transforms/LoopVectorize/pr31190.ll b/llvm/test/Transforms/LoopVectorize/pr31190.ll
index b6cacf1a7fe87..c4f1f9c5d8f0c 100644
--- a/llvm/test/Transforms/LoopVectorize/pr31190.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr31190.ll
@@ -5,7 +5,7 @@
 ; is a SCEV AddRec with respect to an outer loop.
 
 ; In this case, the problematic PHI is:
-; %0 = phi i32 [ undef, %for.cond1.preheader ], [ %inc54, %for.body3 ]
+; %0 = phi i32 [ poison, %for.cond1.preheader ], [ %inc54, %for.body3 ]
 ; Since %inc54 is the IV of the outer loop, and %0 equivalent to it,
 ; we get the situation described above.
 
@@ -47,7 +47,7 @@ for.cond1.preheader:                              ; preds = %for.cond1.for.inc4_
 
 for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
   %inc1 = phi i32 [ %inc.lcssa3, %for.cond1.preheader ], [ %inc, %for.body3 ]
-  %0 = phi i32 [ undef, %for.cond1.preheader ], [ %inc54, %for.body3 ]
+  %0 = phi i32 [ poison, %for.cond1.preheader ], [ %inc54, %for.body3 ]
   %idxprom = sext i32 %0 to i64
   %arrayidx = getelementptr inbounds [1 x i32], ptr @b, i64 0, i64 %idxprom
   store i32 4, ptr %arrayidx, align 4
diff --git a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll
index c9fc8beb006d9..ecc1ae817b687 100644
--- a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll
+++ b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll
@@ -14,9 +14,9 @@ define void @blend_uniform_iv_trunc(i1 %c) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = trunc i64 [[INDEX]] to i16
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i16 [[TMP0]], 0
-; CHECK-NEXT:    [[PREDPHI:%.*]] = select i1 [[C]], i16 [[TMP1]], i16 undef
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i16 [[PREDPHI]]
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = select i1 [[C]], i16 [[TMP1]], i16 poison
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i16 [[TMP6]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[TMP7]], i32 0
 ; CHE...
[truncated]

Jun 25 '24 13:06 llvmbot

See https://github.com/llvm/llvm-project/issues/68683#issuecomment-1948117173.

Can you provide some performance data on rust benchmarks like this? https://perf.rust-lang.org/compare.html?start=af2525317be950fdae635bcbb46b3e755d14ab49&end=0e3235f85b0a83843fb2397a3345ff3830b408d4&stat=instructions:u

Jun 25 '24 13:06 dtcxzyw

@dtcxzyw Could you check this on llvm-opt-benchmark first? I'm wondering whether this introduces regressions that can be recovered based on more context.

The motivation for doing this now after ignoring this for so long is that we're definitely going to lose this when we stop using undef for uninit loads, so I'd like to know what the impact of just this part is going to be.

Jun 25 '24 15:06 nikic

@dtcxzyw Could you check this on llvm-opt-benchmark first? I'm wondering whether this introduces regressions that can be recovered based on more context.

The motivation for doing this now after ignoring this for so long is that we're definitely going to lose this when we stop using undef for uninit loads, so I'd like to know what the impact of just this part is going to be.

Done.

Jun 25 '24 16:06 dtcxzyw

@dtcxzyw Thanks! I think the main two takeaways are:

I suspect that we need a nopoison attribute (and I guess !nopoison metadata). The current noundef is too strong, and in Rust often cannot be applied because e.g. parts of enums are only conditionally initialized. However, I believe that all frontend-generated arguments/returns/loads can be nopoison, because Rust does not expose poison in its semantics (and I think this holds for pretty much all other frontends as well). Having nopoison would mean that pretty much all "roots" can be known not to be poison. Once undef is removed (or at least not allowed to be referenced from inside functions) we would drop noundef and only have nopoison.
We should probably be folding select c, undef, x to freeze(x) if we can't determine that x is non-poison. Otherwise we may carry a complex select condition around all the way to the backend. Having the freeze is not ideal, but I think it's the better tradeoff. (The same kind of applies to phis after this change, but it's probably less important there because the condition is still used by the branch.)

Jun 25 '24 18:06 nikic

RFC for nopoison: https://discourse.llvm.org/t/rfc-add-nopoison-attribute-metadata/79833 The select -> freeze fold: https://github.com/llvm/llvm-project/pull/96773

On a somewhat tangential note, I'm also wondering whether it would be worthwhile to add some flags on freeze so you can have freeze %x (freezes undef and poison -- used e.g. for despeculation), freeze undef %x (freezes undef only -- used for multi-use) and freeze poison %x (freezes poison only -- used e.g. for the select fold).

Jun 26 '24 15:06 nikic

On a somewhat tangential note, I'm also wondering whether it would be worthwhile to add some flags on freeze so you can have freeze %x (freezes undef and poison -- used e.g. for despeculation), freeze undef %x (freezes undef only -- used for multi-use) and freeze poison %x (freezes poison only -- used e.g. for the select fold).

As we will finally get rid of undef, I don't think the (potential) benefit is worth the maintenance burden.

Jun 26 '24 15:06 dtcxzyw

Moving this to draft as it looks like we're still not ready for this change :(

Jun 27 '24 10:06 nikic

Unfortunately, we just found the first instance where this miscompile occurs in the wild (https://github.com/llvm/llvm-project/issues/97702).

Per the comments of @RalfJung in https://discourse.llvm.org/t/rfc-add-nopoison-attribute-metadata/79833 it looks like Rust does not want to use nopoison attributes, which basically leaves us no choice but to move forward with this as-is and have Rust eat the resulting codegen regressions.

Jul 04 '24 09:07 nikic

For Rust, in the future we'd like to have poison semantics for our values, and then we cannot use nopoison. It would be a shame if LLVM wouldn't let its frontends have poison semantics for values without codegen regressions. Alternatives exist, as mentioned in my comment.

Today, Rust doesn't yet have values with poison semantics (it is regularly requested but we are blocked on lack of support in LLVM, among other things). So today, nopoison could be added in Rust. So if this is some sort of transition strategy that's perfectly fine, but it is not a great long-term solution. My understanding is that undef is being transitioned out, so given that the counterexample needs undef, does that mean things will get better when undef is gone entirely? There will be no more phi(X, undef), only phi(X, poison), and those can always be replaced by X.

Put differently, in Rust we already have to be careful to not use poison, and that is tracked here. So adding nopoison everywhere doesn't make things any worse now, it just makes them more explicit. However our hope is that we can in the future start using poison semantics...

Jul 04 '24 10:07 RalfJung

Sorry, my initial analysis of https://github.com/llvm/llvm-project/issues/97702 wasn't right, and this wasn't the cause of that issue after all. So this one remains a theoretical problem only for now...

Jul 04 '24 10:07 nikic

@nikic Can you provide some performance data on rust benchmarks before landing this patch?

Jul 04 '24 15:07 dtcxzyw

@nikic Can you provide some performance data on rust benchmarks before landing this patch?

I'm adding this test.

Jul 05 '24 00:07 dianqk

@nikic Can you provide some performance data on rust benchmarks before landing this patch?

Based on the known test set, this has no noticeable impact: https://github.com/rust-lang/rust/pull/127347#issuecomment-2210100734.

Jul 05 '24 04:07 dianqk

As far as I'm aware, this issue is still theoretical, but it does regularly cause confusion and duplicate issue reports, so I'm inclined to merge it.

Nov 07 '24 11:11 nikic

LLVM Buildbot has detected a new failure on builder lldb-remote-linux-ubuntu running on as-builder-9 while building llvm at step 16 "test-check-lldb-api".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/195/builds/741

Here is the relevant piece of the build log for the reference

Step 16 (test-check-lldb-api) failure: Test just built components: check-lldb-api completed (failure)
...
PASS: lldb-api :: types/TestIntegerType.py (1200 of 1209)
PASS: lldb-api :: types/TestIntegerTypeExpr.py (1201 of 1209)
PASS: lldb-api :: types/TestRecursiveTypes.py (1202 of 1209)
PASS: lldb-api :: types/TestShortType.py (1203 of 1209)
PASS: lldb-api :: types/TestLongTypes.py (1204 of 1209)
PASS: lldb-api :: types/TestShortTypeExpr.py (1205 of 1209)
PASS: lldb-api :: types/TestLongTypesExpr.py (1206 of 1209)
PASS: lldb-api :: tools/lldb-server/TestNonStop.py (1207 of 1209)
PASS: lldb-api :: tools/lldb-server/TestLldbGdbServer.py (1208 of 1209)
TIMEOUT: lldb-api :: python_api/process/cancel_attach/TestCancelAttach.py (1209 of 1209)
******************** TEST 'lldb-api :: python_api/process/cancel_attach/TestCancelAttach.py' FAILED ********************
Script:
--
/usr/bin/python3.12 /home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/llvm-project/lldb/test/API/dotest.py -u CXXFLAGS -u CFLAGS --env LLVM_LIBS_DIR=/home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/./lib --env LLVM_INCLUDE_DIR=/home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/include --env LLVM_TOOLS_DIR=/home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/./bin --libcxx-include-dir /home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/include/c++/v1 --libcxx-include-target-dir /home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/include/aarch64-unknown-linux-gnu/c++/v1 --libcxx-library-dir /home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/./lib/aarch64-unknown-linux-gnu --arch aarch64 --build-dir /home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/lldb-test-build.noindex --lldb-module-cache-dir /home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/lldb-test-build.noindex/module-cache-lldb/lldb-api --clang-module-cache-dir /home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/lldb-test-build.noindex/module-cache-clang/lldb-api --executable /home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/./bin/lldb --compiler /home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/bin/clang --dsymutil /home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/./bin/dsymutil --make /usr/bin/make --llvm-tools-dir /home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/./bin --lldb-obj-root /home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/tools/lldb --lldb-libs-dir /home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/./lib --platform-url connect://jetson-agx-2198.lab.llvm.org:1234 --platform-working-dir /home/ubuntu/lldb-tests --sysroot /mnt/fs/jetson-agx-ubuntu --env ARCH_CFLAGS=-mcpu=cortex-a78 --platform-name remote-linux /home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/llvm-project/lldb/test/API/python_api/process/cancel_attach -p TestCancelAttach.py
--
Exit Code: -9
Timeout: Reached timeout of 600 seconds

Command Output (stdout):
--
lldb version 20.0.0git (https://github.com/llvm/llvm-project.git revision dd116369f646d023a2e7e5c145a1bed5dcf9a45c)
  clang revision dd116369f646d023a2e7e5c145a1bed5dcf9a45c
  llvm revision dd116369f646d023a2e7e5c145a1bed5dcf9a45c

--
Command Output (stderr):
--
WARNING:root:Custom libc++ is not supported for remote runs: ignoring --libcxx arguments
FAIL: LLDB (/home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/bin/clang-aarch64) :: test_scripted_implementation (TestCancelAttach.AttachCancelTestCase.test_scripted_implementation)

--

********************
********************
Timed Out Tests (1):
  lldb-api :: python_api/process/cancel_attach/TestCancelAttach.py


Testing Time: 927.76s

Total Discovered Tests: 1209
  Unsupported      : 427 (35.32%)
  Passed           : 764 (63.19%)
  Expectedly Failed:  17 (1.41%)
  Timed Out        :   1 (0.08%)
FAILED: tools/lldb/test/API/CMakeFiles/check-lldb-api /home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/tools/lldb/test/API/CMakeFiles/check-lldb-api 
cd /home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/tools/lldb/test/API && /usr/bin/python3.12 /home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/./bin/llvm-lit -vv -v -vv --threads=8 /home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/tools/lldb/test/API
ninja: build stopped: subcommand failed.

Nov 07 '24 13:11 llvm-ci