Skip to content

Commit

Permalink
Revert "[RISCV] Shrink vslideup's LMUL when lowering fixed insert_sub…
Browse files Browse the repository at this point in the history
…vector (#65997)"

This reverts commit b5ff71e.  As described in
#68730, this appears to have exposed
an existing liveness issue.  Revert to green until we can figure out how to
address the root cause.

Note: This was not a clean revert.  I ended up doing it by hand.
  • Loading branch information
preames committed Oct 10, 2023
1 parent 2e59b75 commit 3a6cc52
Show file tree
Hide file tree
Showing 4 changed files with 244 additions and 228 deletions.
17 changes: 0 additions & 17 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8887,17 +8887,6 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
return DAG.getBitcast(Op.getValueType(), SubVec);
}

// Shrink down Vec so we're performing the slideup on a smaller LMUL.
unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
MVT OrigContainerVT = ContainerVT;
SDValue OrigVec = Vec;
if (auto ShrunkVT =
getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
ContainerVT = *ShrunkVT;
Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
DAG.getVectorIdxConstant(0, DL));
}

SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
DAG.getUNDEF(ContainerVT), SubVec,
DAG.getConstant(0, DL, XLenVT));
Expand All @@ -8924,12 +8913,6 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
SlideupAmt, Mask, VL, Policy);
}

// If we performed the slideup on a smaller LMUL, insert the result back
// into the rest of the vector.
if (ContainerVT != OrigContainerVT)
SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
SubVec, DAG.getVectorIdxConstant(0, DL));

if (VecVT.isFixedLengthVector())
SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
return DAG.getBitcast(Op.getValueType(), SubVec);
Expand Down
45 changes: 24 additions & 21 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ define <vscale x 8 x i32> @insert_nxv8i32_v2i32_0(<vscale x 8 x i32> %vec, ptr %
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vle32.v v12, (a0)
; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; CHECK-NEXT: vsetivli zero, 2, e32, m4, tu, ma
; CHECK-NEXT: vmv.v.v v8, v12
; CHECK-NEXT: ret
%sv = load <2 x i32>, ptr %svp
Expand All @@ -27,7 +27,7 @@ define <vscale x 8 x i32> @insert_nxv8i32_v2i32_2(<vscale x 8 x i32> %vec, ptr %
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vle32.v v12, (a0)
; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; CHECK-NEXT: vsetivli zero, 4, e32, m4, tu, ma
; CHECK-NEXT: vslideup.vi v8, v12, 2
; CHECK-NEXT: ret
%sv = load <2 x i32>, ptr %svp
Expand All @@ -40,7 +40,7 @@ define <vscale x 8 x i32> @insert_nxv8i32_v2i32_6(<vscale x 8 x i32> %vec, ptr %
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vle32.v v12, (a0)
; CHECK-NEXT: vsetivli zero, 8, e32, m2, tu, ma
; CHECK-NEXT: vsetivli zero, 8, e32, m4, tu, ma
; CHECK-NEXT: vslideup.vi v8, v12, 6
; CHECK-NEXT: ret
%sv = load <2 x i32>, ptr %svp
Expand All @@ -51,19 +51,22 @@ define <vscale x 8 x i32> @insert_nxv8i32_v2i32_6(<vscale x 8 x i32> %vec, ptr %
define <vscale x 8 x i32> @insert_nxv8i32_v8i32_0(<vscale x 8 x i32> %vec, ptr %svp) {
; LMULMAX2-LABEL: insert_nxv8i32_v8i32_0:
; LMULMAX2: # %bb.0:
; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, tu, ma
; LMULMAX2-NEXT: vle32.v v8, (a0)
; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; LMULMAX2-NEXT: vle32.v v12, (a0)
; LMULMAX2-NEXT: vsetivli zero, 8, e32, m4, tu, ma
; LMULMAX2-NEXT: vmv.v.v v8, v12
; LMULMAX2-NEXT: ret
;
; LMULMAX1-LABEL: insert_nxv8i32_v8i32_0:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-NEXT: vle32.v v12, (a1)
; LMULMAX1-NEXT: vsetvli zero, zero, e32, m1, tu, ma
; LMULMAX1-NEXT: vle32.v v8, (a0)
; LMULMAX1-NEXT: vsetivli zero, 8, e32, m2, tu, ma
; LMULMAX1-NEXT: vslideup.vi v8, v12, 4
; LMULMAX1-NEXT: vle32.v v12, (a0)
; LMULMAX1-NEXT: addi a0, a0, 16
; LMULMAX1-NEXT: vle32.v v16, (a0)
; LMULMAX1-NEXT: vsetivli zero, 4, e32, m4, tu, ma
; LMULMAX1-NEXT: vmv.v.v v8, v12
; LMULMAX1-NEXT: vsetivli zero, 8, e32, m4, tu, ma
; LMULMAX1-NEXT: vslideup.vi v8, v16, 4
; LMULMAX1-NEXT: ret
%sv = load <8 x i32>, ptr %svp
%v = call <vscale x 8 x i32> @llvm.vector.insert.v8i32.nxv8i32(<vscale x 8 x i32> %vec, <8 x i32> %sv, i64 0)
Expand All @@ -81,14 +84,14 @@ define <vscale x 8 x i32> @insert_nxv8i32_v8i32_8(<vscale x 8 x i32> %vec, ptr %
;
; LMULMAX1-LABEL: insert_nxv8i32_v8i32_8:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-NEXT: vle32.v v12, (a0)
; LMULMAX1-NEXT: addi a0, a0, 16
; LMULMAX1-NEXT: vle32.v v12, (a1)
; LMULMAX1-NEXT: vle32.v v16, (a0)
; LMULMAX1-NEXT: vsetivli zero, 12, e32, m4, tu, ma
; LMULMAX1-NEXT: vslideup.vi v8, v12, 8
; LMULMAX1-NEXT: vslideup.vi v8, v16, 8
; LMULMAX1-NEXT: vsetivli zero, 16, e32, m4, tu, ma
; LMULMAX1-NEXT: vslideup.vi v8, v16, 12
; LMULMAX1-NEXT: vslideup.vi v8, v12, 12
; LMULMAX1-NEXT: ret
%sv = load <8 x i32>, ptr %svp
%v = call <vscale x 8 x i32> @llvm.vector.insert.v8i32.nxv8i32(<vscale x 8 x i32> %vec, <8 x i32> %sv, i64 8)
Expand Down Expand Up @@ -163,7 +166,7 @@ define void @insert_v8i32_v2i32_0(ptr %vp, ptr %svp) {
; LMULMAX2-NEXT: vle32.v v8, (a1)
; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; LMULMAX2-NEXT: vle32.v v10, (a0)
; LMULMAX2-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; LMULMAX2-NEXT: vsetivli zero, 2, e32, m2, tu, ma
; LMULMAX2-NEXT: vmv.v.v v10, v8
; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; LMULMAX2-NEXT: vse32.v v10, (a0)
Expand Down Expand Up @@ -194,7 +197,7 @@ define void @insert_v8i32_v2i32_2(ptr %vp, ptr %svp) {
; LMULMAX2-NEXT: vle32.v v8, (a1)
; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; LMULMAX2-NEXT: vle32.v v10, (a0)
; LMULMAX2-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; LMULMAX2-NEXT: vsetivli zero, 4, e32, m2, tu, ma
; LMULMAX2-NEXT: vslideup.vi v10, v8, 2
; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; LMULMAX2-NEXT: vse32.v v10, (a0)
Expand Down Expand Up @@ -505,9 +508,9 @@ define void @insert_v2i64_nxv16i64(ptr %psv0, ptr %psv1, <vscale x 16 x i64>* %o
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vle64.v v12, (a1)
; CHECK-NEXT: vsetivli zero, 6, e64, m4, tu, ma
; CHECK-NEXT: vslideup.vi v8, v12, 4
; CHECK-NEXT: vle64.v v16, (a1)
; CHECK-NEXT: vsetivli zero, 6, e64, m8, tu, ma
; CHECK-NEXT: vslideup.vi v8, v16, 4
; CHECK-NEXT: vs8r.v v8, (a2)
; CHECK-NEXT: ret
%sv0 = load <2 x i64>, ptr %psv0
Expand Down Expand Up @@ -536,7 +539,7 @@ define void @insert_v2i64_nxv16i64_lo2(ptr %psv, <vscale x 16 x i64>* %out) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vsetivli zero, 4, e64, m8, ta, ma
; CHECK-NEXT: vslideup.vi v16, v8, 2
; CHECK-NEXT: vs8r.v v16, (a1)
; CHECK-NEXT: ret
Expand Down
80 changes: 40 additions & 40 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,13 @@ define void @widen_3xv4i16(ptr %x, ptr %z) {
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: addi a2, a0, 8
; CHECK-NEXT: vle16.v v9, (a2)
; CHECK-NEXT: vle16.v v10, (a2)
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vle16.v v10, (a0)
; CHECK-NEXT: vsetivli zero, 8, e16, m1, tu, ma
; CHECK-NEXT: vslideup.vi v8, v9, 4
; CHECK-NEXT: vle16.v v12, (a0)
; CHECK-NEXT: vsetivli zero, 8, e16, m2, tu, ma
; CHECK-NEXT: vslideup.vi v8, v10, 4
; CHECK-NEXT: vsetivli zero, 12, e16, m2, tu, ma
; CHECK-NEXT: vslideup.vi v8, v10, 8
; CHECK-NEXT: vslideup.vi v8, v12, 8
; CHECK-NEXT: vse16.v v8, (a1)
; CHECK-NEXT: ret
%a = load <4 x i16>, ptr %x
Expand Down Expand Up @@ -75,17 +75,17 @@ define void @widen_4xv4i16_unaligned(ptr %x, ptr %z) {
; CHECK-NO-MISALIGN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NO-MISALIGN-NEXT: vle8.v v8, (a0)
; CHECK-NO-MISALIGN-NEXT: addi a2, a0, 8
; CHECK-NO-MISALIGN-NEXT: vle8.v v9, (a2)
; CHECK-NO-MISALIGN-NEXT: addi a2, a0, 16
; CHECK-NO-MISALIGN-NEXT: vle8.v v10, (a2)
; CHECK-NO-MISALIGN-NEXT: addi a2, a0, 16
; CHECK-NO-MISALIGN-NEXT: vle8.v v12, (a2)
; CHECK-NO-MISALIGN-NEXT: addi a0, a0, 24
; CHECK-NO-MISALIGN-NEXT: vle8.v v12, (a0)
; CHECK-NO-MISALIGN-NEXT: vsetvli zero, zero, e16, m1, tu, ma
; CHECK-NO-MISALIGN-NEXT: vslideup.vi v8, v9, 4
; CHECK-NO-MISALIGN-NEXT: vle8.v v14, (a0)
; CHECK-NO-MISALIGN-NEXT: vsetivli zero, 8, e16, m2, tu, ma
; CHECK-NO-MISALIGN-NEXT: vslideup.vi v8, v10, 4
; CHECK-NO-MISALIGN-NEXT: vsetivli zero, 12, e16, m2, tu, ma
; CHECK-NO-MISALIGN-NEXT: vslideup.vi v8, v10, 8
; CHECK-NO-MISALIGN-NEXT: vslideup.vi v8, v12, 8
; CHECK-NO-MISALIGN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NO-MISALIGN-NEXT: vslideup.vi v8, v12, 12
; CHECK-NO-MISALIGN-NEXT: vslideup.vi v8, v14, 12
; CHECK-NO-MISALIGN-NEXT: vse16.v v8, (a1)
; CHECK-NO-MISALIGN-NEXT: ret
;
Expand Down Expand Up @@ -188,17 +188,17 @@ define void @strided_constant_mismatch_4xv4i16(ptr %x, ptr %z) {
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: addi a2, a0, 2
; CHECK-NEXT: vle16.v v9, (a2)
; CHECK-NEXT: addi a2, a0, 6
; CHECK-NEXT: vle16.v v10, (a2)
; CHECK-NEXT: addi a2, a0, 6
; CHECK-NEXT: vle16.v v12, (a2)
; CHECK-NEXT: addi a0, a0, 8
; CHECK-NEXT: vle16.v v12, (a0)
; CHECK-NEXT: vsetivli zero, 8, e16, m1, tu, ma
; CHECK-NEXT: vslideup.vi v8, v9, 4
; CHECK-NEXT: vle16.v v14, (a0)
; CHECK-NEXT: vsetivli zero, 8, e16, m2, tu, ma
; CHECK-NEXT: vslideup.vi v8, v10, 4
; CHECK-NEXT: vsetivli zero, 12, e16, m2, tu, ma
; CHECK-NEXT: vslideup.vi v8, v10, 8
; CHECK-NEXT: vslideup.vi v8, v12, 8
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT: vslideup.vi v8, v12, 12
; CHECK-NEXT: vslideup.vi v8, v14, 12
; CHECK-NEXT: vse16.v v8, (a1)
; CHECK-NEXT: ret
%a = load <4 x i16>, ptr %x
Expand Down Expand Up @@ -258,17 +258,17 @@ define void @strided_runtime_mismatch_4xv4i16(ptr %x, ptr %z, i64 %s, i64 %t) {
; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; RV32-NEXT: vle16.v v8, (a0)
; RV32-NEXT: add a0, a0, a2
; RV32-NEXT: vle16.v v9, (a0)
; RV32-NEXT: add a0, a0, a4
; RV32-NEXT: vle16.v v10, (a0)
; RV32-NEXT: add a0, a0, a2
; RV32-NEXT: add a0, a0, a4
; RV32-NEXT: vle16.v v12, (a0)
; RV32-NEXT: vsetivli zero, 8, e16, m1, tu, ma
; RV32-NEXT: vslideup.vi v8, v9, 4
; RV32-NEXT: add a0, a0, a2
; RV32-NEXT: vle16.v v14, (a0)
; RV32-NEXT: vsetivli zero, 8, e16, m2, tu, ma
; RV32-NEXT: vslideup.vi v8, v10, 4
; RV32-NEXT: vsetivli zero, 12, e16, m2, tu, ma
; RV32-NEXT: vslideup.vi v8, v10, 8
; RV32-NEXT: vslideup.vi v8, v12, 8
; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; RV32-NEXT: vslideup.vi v8, v12, 12
; RV32-NEXT: vslideup.vi v8, v14, 12
; RV32-NEXT: vse16.v v8, (a1)
; RV32-NEXT: ret
;
Expand All @@ -277,17 +277,17 @@ define void @strided_runtime_mismatch_4xv4i16(ptr %x, ptr %z, i64 %s, i64 %t) {
; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; RV64-NEXT: vle16.v v8, (a0)
; RV64-NEXT: add a0, a0, a2
; RV64-NEXT: vle16.v v9, (a0)
; RV64-NEXT: add a0, a0, a3
; RV64-NEXT: vle16.v v10, (a0)
; RV64-NEXT: add a0, a0, a2
; RV64-NEXT: add a0, a0, a3
; RV64-NEXT: vle16.v v12, (a0)
; RV64-NEXT: vsetivli zero, 8, e16, m1, tu, ma
; RV64-NEXT: vslideup.vi v8, v9, 4
; RV64-NEXT: add a0, a0, a2
; RV64-NEXT: vle16.v v14, (a0)
; RV64-NEXT: vsetivli zero, 8, e16, m2, tu, ma
; RV64-NEXT: vslideup.vi v8, v10, 4
; RV64-NEXT: vsetivli zero, 12, e16, m2, tu, ma
; RV64-NEXT: vslideup.vi v8, v10, 8
; RV64-NEXT: vslideup.vi v8, v12, 8
; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; RV64-NEXT: vslideup.vi v8, v12, 12
; RV64-NEXT: vslideup.vi v8, v14, 12
; RV64-NEXT: vse16.v v8, (a1)
; RV64-NEXT: ret
;
Expand All @@ -296,17 +296,17 @@ define void @strided_runtime_mismatch_4xv4i16(ptr %x, ptr %z, i64 %s, i64 %t) {
; ZVE64F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; ZVE64F-NEXT: vle16.v v8, (a0)
; ZVE64F-NEXT: add a0, a0, a2
; ZVE64F-NEXT: vle16.v v9, (a0)
; ZVE64F-NEXT: add a0, a0, a3
; ZVE64F-NEXT: vle16.v v10, (a0)
; ZVE64F-NEXT: add a0, a0, a2
; ZVE64F-NEXT: add a0, a0, a3
; ZVE64F-NEXT: vle16.v v12, (a0)
; ZVE64F-NEXT: vsetivli zero, 8, e16, m1, tu, ma
; ZVE64F-NEXT: vslideup.vi v8, v9, 4
; ZVE64F-NEXT: add a0, a0, a2
; ZVE64F-NEXT: vle16.v v14, (a0)
; ZVE64F-NEXT: vsetivli zero, 8, e16, m2, tu, ma
; ZVE64F-NEXT: vslideup.vi v8, v10, 4
; ZVE64F-NEXT: vsetivli zero, 12, e16, m2, tu, ma
; ZVE64F-NEXT: vslideup.vi v8, v10, 8
; ZVE64F-NEXT: vslideup.vi v8, v12, 8
; ZVE64F-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; ZVE64F-NEXT: vslideup.vi v8, v12, 12
; ZVE64F-NEXT: vslideup.vi v8, v14, 12
; ZVE64F-NEXT: vse16.v v8, (a1)
; ZVE64F-NEXT: ret
%a = load <4 x i16>, ptr %x
Expand Down
Loading

0 comments on commit 3a6cc52

Please sign in to comment.