llvm-project [DAGCombiner] Fix assertion failure in vector division lowering

Recent fixes in division legalization trip the legality assertion when NewNodesMustHaveLegalTypes is set.

Dec 15 '25 15:12 SavchenkoValeriy

@llvm/pr-subscribers-backend-aarch64

@llvm/pr-subscribers-llvm-selectiondag

Author: Valeriy Savchenko (SavchenkoValeriy)

Changes

Recent fixes in division legalization trip the legality assertion when NewNodesMustHaveLegalTypes is set.

Full diff: https://github.com/llvm/llvm-project/pull/172321.diff

2 Files Affected:

(modified) llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp (+31-4)
(added) llvm/test/CodeGen/AArch64/vector-div-by-promoted-const-no-assertion.ll (+86)

diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index bf26fec287636..69120114fb78f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -6559,6 +6559,19 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
   if (N->getFlags().hasExact())
     return BuildExactSDIV(*this, N, dl, DAG, Created);
 
+  // If we're after type legalization and SVT is not legal, use the
+  // promoted type for creating constants to avoid creating nodes with
+  // illegal types.
+  if (IsAfterLegalTypes && VT.isVector()) {
+    SVT = getTypeToTransformTo(*DAG.getContext(), SVT);
+    if (SVT.bitsLT(VT.getScalarType()))
+      return SDValue();
+    ShSVT = getTypeToTransformTo(*DAG.getContext(), ShSVT);
+    if (ShSVT.bitsLT(ShVT.getScalarType()))
+      return SDValue();
+  }
+  const unsigned SVTBits = SVT.getSizeInBits();
+
   SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
 
   auto BuildSDIVPattern = [&](ConstantSDNode *C) {
@@ -6585,7 +6598,8 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
       NumeratorFactor = -1;
     }
 
-    MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT));
+    MagicFactors.push_back(
+        DAG.getConstant(magics.Magic.sext(SVTBits), dl, SVT));
     Factors.push_back(DAG.getSignedConstant(NumeratorFactor, dl, SVT));
     Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
     ShiftMasks.push_back(DAG.getSignedConstant(ShiftMask, dl, SVT));
@@ -6736,6 +6750,19 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
   // avoid expensive fixups.
   unsigned KnownLeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
 
+  // If we're after type legalization and SVT is not legal, use the
+  // promoted type for creating constants to avoid creating nodes with
+  // illegal types.
+  if (IsAfterLegalTypes && VT.isVector()) {
+    SVT = getTypeToTransformTo(*DAG.getContext(), SVT);
+    if (SVT.bitsLT(VT.getScalarType()))
+      return SDValue();
+    ShSVT = getTypeToTransformTo(*DAG.getContext(), ShSVT);
+    if (ShSVT.bitsLT(ShVT.getScalarType()))
+      return SDValue();
+  }
+  const unsigned SVTBits = SVT.getSizeInBits();
+
   bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
   SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
 
@@ -6758,7 +6785,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
           UnsignedDivisionByConstantInfo::get(
               Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
 
-      MagicFactor = DAG.getConstant(magics.Magic, dl, SVT);
+      MagicFactor = DAG.getConstant(magics.Magic.zext(SVTBits), dl, SVT);
 
       assert(magics.PreShift < Divisor.getBitWidth() &&
              "We shouldn't generate an undefined shift!");
@@ -6769,8 +6796,8 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
       PreShift = DAG.getConstant(magics.PreShift, dl, ShSVT);
       PostShift = DAG.getConstant(magics.PostShift, dl, ShSVT);
       NPQFactor = DAG.getConstant(
-          magics.IsAdd ? APInt::getOneBitSet(EltBits, EltBits - 1)
-                       : APInt::getZero(EltBits),
+          magics.IsAdd ? APInt::getOneBitSet(SVTBits, EltBits - 1)
+                       : APInt::getZero(SVTBits),
           dl, SVT);
       UseNPQ |= magics.IsAdd;
       UsePreShift |= magics.PreShift != 0;
diff --git a/llvm/test/CodeGen/AArch64/vector-div-by-promoted-const-no-assertion.ll b/llvm/test/CodeGen/AArch64/vector-div-by-promoted-const-no-assertion.ll
new file mode 100644
index 0000000000000..707ae0759d4f4
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/vector-div-by-promoted-const-no-assertion.ll
@@ -0,0 +1,86 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-pc-windows-msvc < %s | FileCheck %s
+
+target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-p:64:64-i32:32-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "aarch64-pc-windows-msvc"
+
+; udiv with zext input - the zext from i8 to i16 triggers vector splitting
+; which exposes the promoted constant issue
+define <16 x i16> @udiv_v16i16_from_zext(<16 x i8> %x) {
+; CHECK-LABEL: udiv_v16i16_from_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w8, #21846 // =0x5556
+; CHECK-NEXT:    ushll2 v1.8h, v0.16b, #0
+; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-NEXT:    dup v2.8h, w8
+; CHECK-NEXT:    umull2 v3.4s, v1.8h, v2.8h
+; CHECK-NEXT:    umull v1.4s, v1.4h, v2.4h
+; CHECK-NEXT:    umull2 v4.4s, v0.8h, v2.8h
+; CHECK-NEXT:    umull v0.4s, v0.4h, v2.4h
+; CHECK-NEXT:    uzp2 v1.8h, v1.8h, v3.8h
+; CHECK-NEXT:    uzp2 v0.8h, v0.8h, v4.8h
+; CHECK-NEXT:    ret
+entry:
+  %zext = zext <16 x i8> %x to <16 x i16>
+  %div = udiv <16 x i16> %zext, splat (i16 3)
+  ret <16 x i16> %div
+}
+
+; sdiv with zext input
+define <16 x i16> @sdiv_v16i16_from_zext(<16 x i8> %x) {
+; CHECK-LABEL: sdiv_v16i16_from_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w8, #21846 // =0x5556
+; CHECK-NEXT:    ushll2 v1.8h, v0.16b, #0
+; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-NEXT:    dup v2.8h, w8
+; CHECK-NEXT:    umull2 v3.4s, v1.8h, v2.8h
+; CHECK-NEXT:    umull v1.4s, v1.4h, v2.4h
+; CHECK-NEXT:    umull2 v4.4s, v0.8h, v2.8h
+; CHECK-NEXT:    umull v0.4s, v0.4h, v2.4h
+; CHECK-NEXT:    uzp2 v1.8h, v1.8h, v3.8h
+; CHECK-NEXT:    uzp2 v0.8h, v0.8h, v4.8h
+; CHECK-NEXT:    ret
+entry:
+  %zext = zext <16 x i8> %x to <16 x i16>
+  %div = sdiv <16 x i16> %zext, splat (i16 3)
+  ret <16 x i16> %div
+}
+
+; udiv exact with zext input
+define <16 x i16> @udiv_exact_v16i16_from_zext(<16 x i8> %x) {
+; CHECK-LABEL: udiv_exact_v16i16_from_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w8, #43691 // =0xaaab
+; CHECK-NEXT:    ushll v2.8h, v0.8b, #0
+; CHECK-NEXT:    ushll2 v0.8h, v0.16b, #0
+; CHECK-NEXT:    dup v3.8h, w8
+; CHECK-NEXT:    mul v1.8h, v0.8h, v3.8h
+; CHECK-NEXT:    mul v0.8h, v2.8h, v3.8h
+; CHECK-NEXT:    ret
+entry:
+  %zext = zext <16 x i8> %x to <16 x i16>
+  %div = udiv exact <16 x i16> %zext, splat (i16 3)
+  ret <16 x i16> %div
+}
+
+; sdiv exact with zext input
+define <16 x i16> @sdiv_exact_v16i16_from_zext(<16 x i8> %x) {
+; CHECK-LABEL: sdiv_exact_v16i16_from_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w8, #21846 // =0x5556
+; CHECK-NEXT:    ushll2 v1.8h, v0.16b, #0
+; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-NEXT:    dup v2.8h, w8
+; CHECK-NEXT:    umull2 v3.4s, v1.8h, v2.8h
+; CHECK-NEXT:    umull v1.4s, v1.4h, v2.4h
+; CHECK-NEXT:    umull2 v4.4s, v0.8h, v2.8h
+; CHECK-NEXT:    umull v0.4s, v0.4h, v2.4h
+; CHECK-NEXT:    uzp2 v1.8h, v1.8h, v3.8h
+; CHECK-NEXT:    uzp2 v0.8h, v0.8h, v4.8h
+; CHECK-NEXT:    ret
+entry:
+  %zext = zext <16 x i8> %x to <16 x i16>
+  %div = sdiv exact <16 x i16> %zext, splat (i16 3)
+  ret <16 x i16> %div
+}

Dec 15 '25 15:12 llvmbot

Is this ready to merge? HEAD's been broken for quite a while now.

Dec 17 '25 12:12 nico

Is this ready to merge? HEAD's been broken for quite a while now.

I believe it is. @topperc @arsenm Can you please take another look?

Dec 17 '25 12:12 SavchenkoValeriy