Skip to content

Commit

Permalink
[RISCV] Teach vsetvli insertion to remember when predecessors have sa…
Browse files Browse the repository at this point in the history
…me AVL and SEW/LMUL ratio if their VTYPEs otherwise mismatch.

Previously we went directly to unknown state on VTYPE mismatch.
If we instead remember the partial match, we can use this to
still use X0, X0 vsetvli in successors if AVL and needed SEW/LMUL
ratio match.

Reviewed By: frasercrmck

Differential Revision: https://reviews.llvm.org/D104069
  • Loading branch information
topperc committed Jun 18, 2021
1 parent 8c2c972 commit ac87133
Show file tree
Hide file tree
Showing 3 changed files with 134 additions and 8 deletions.
49 changes: 43 additions & 6 deletions llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,12 @@ class VSETVLIInfo {
uint8_t TailAgnostic : 1;
uint8_t MaskAgnostic : 1;
uint8_t MaskRegOp : 1;
uint8_t SEWLMULRatioOnly : 1;

public:
VSETVLIInfo()
: AVLImm(0), TailAgnostic(false), MaskAgnostic(false), MaskRegOp(false) {}
: AVLImm(0), TailAgnostic(false), MaskAgnostic(false), MaskRegOp(false),
SEWLMULRatioOnly(false) {}

static VSETVLIInfo getUnknown() {
VSETVLIInfo Info;
Expand Down Expand Up @@ -127,16 +129,20 @@ class VSETVLIInfo {
}

unsigned encodeVTYPE() const {
assert(isValid() && !isUnknown() &&
assert(isValid() && !isUnknown() && !SEWLMULRatioOnly &&
"Can't encode VTYPE for uninitialized or unknown");
return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
}

bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; }

bool hasSameVTYPE(const VSETVLIInfo &Other) const {
assert(isValid() && Other.isValid() &&
"Can't compare invalid VSETVLIInfos");
assert(!isUnknown() && !Other.isUnknown() &&
"Can't compare VTYPE in unknown state");
assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly &&
"Can't compare when only LMUL/SEW ratio is valid.");
return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) ==
std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic,
Other.MaskAgnostic);
Expand Down Expand Up @@ -172,10 +178,16 @@ class VSETVLIInfo {
bool isCompatible(const VSETVLIInfo &InstrInfo) const {
assert(isValid() && InstrInfo.isValid() &&
"Can't compare invalid VSETVLIInfos");
assert(!InstrInfo.SEWLMULRatioOnly &&
"Expected a valid VTYPE for instruction!");
// Nothing is compatible with Unknown.
if (isUnknown() || InstrInfo.isUnknown())
return false;

// If only our VLMAX ratio is valid, then this isn't compatible.
if (SEWLMULRatioOnly)
return false;

// If the instruction doesn't need an AVLReg and the SEW matches, consider
// it/ compatible.
if (InstrInfo.hasAVLReg() && InstrInfo.AVLReg == RISCV::NoRegister) {
Expand Down Expand Up @@ -209,8 +221,19 @@ class VSETVLIInfo {
if (Other.isUnknown())
return isUnknown();

// Otherwise compare the VTYPE and AVL.
return hasSameVTYPE(Other) && hasSameAVL(Other);
if (!hasSameAVL(Other))
return false;

// If only the VLMAX is valid, check that it is the same.
if (SEWLMULRatioOnly && Other.SEWLMULRatioOnly)
return hasSameVLMAX(Other);

// If the full VTYPE is valid, check that it is the same.
if (!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly)
return hasSameVTYPE(Other);

// If the SEWLMULRatioOnly bits are different, then they aren't equal.
return false;
}

// Calculate the VSETVLIInfo visible to a block assuming this and Other are
Expand All @@ -224,10 +247,23 @@ class VSETVLIInfo {
if (!isValid())
return Other;

// If either is unknown, the result is unknown.
if (isUnknown() || Other.isUnknown())
return VSETVLIInfo::getUnknown();

// If we have an exact, match return this.
if (*this == Other)
return *this;

// If the configurations don't match, assume unknown.
// Not an exact match, but maybe the AVL and VLMAX are the same. If so,
// return an SEW/LMUL ratio only value.
if (hasSameAVL(Other) && hasSameVLMAX(Other)) {
VSETVLIInfo MergeInfo = *this;
MergeInfo.SEWLMULRatioOnly = true;
return MergeInfo;
}

// Otherwise the result is unknown.
return VSETVLIInfo::getUnknown();
}

Expand Down Expand Up @@ -444,7 +480,8 @@ bool RISCVInsertVSETVLI::needVSETVLI(const VSETVLIInfo &Require,
// and the last VL/VTYPE we observed is the same, we don't need a
// VSETVLI here.
if (!CurInfo.isUnknown() && Require.hasAVLReg() &&
Require.getAVLReg().isVirtual() && Require.hasSameVTYPE(CurInfo)) {
Require.getAVLReg().isVirtual() && !CurInfo.hasSEWLMULRatioOnly() &&
Require.hasSameVTYPE(CurInfo)) {
if (MachineInstr *DefMI = MRI->getVRegDef(Require.getAVLReg())) {
if (DefMI->getOpcode() == RISCV::PseudoVSETVLI ||
DefMI->getOpcode() == RISCV::PseudoVSETIVLI) {
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll
Original file line number Diff line number Diff line change
Expand Up @@ -584,7 +584,7 @@ define void @masked_load_v2i32_align1(<2 x i32>* %a, <2 x i32> %m, <2 x i32>* %r
; RV32-NEXT: vsetvli zero, zero, e32, mf2, tu, mu
; RV32-NEXT: vslideup.vi v25, v26, 1
; RV32-NEXT: .LBB8_4: # %else2
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
; RV32-NEXT: vse32.v v25, (a1)
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
Expand Down Expand Up @@ -644,7 +644,7 @@ define void @masked_load_v2i32_align1(<2 x i32>* %a, <2 x i32> %m, <2 x i32>* %r
; RV64-NEXT: vsetvli zero, zero, e32, mf2, tu, mu
; RV64-NEXT: vslideup.vi v25, v26, 1
; RV64-NEXT: .LBB8_4: # %else2
; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
; RV64-NEXT: vse32.v v25, (a1)
; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
Expand Down
89 changes: 89 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
Original file line number Diff line number Diff line change
Expand Up @@ -496,3 +496,92 @@ declare i64 @llvm.riscv.vsetvli.i64(i64, i64 immarg, i64 immarg)
declare <vscale x 16 x float> @llvm.riscv.vle.nxv16f32.i64(<vscale x 16 x float>* nocapture, i64)
declare <vscale x 16 x float> @llvm.riscv.vfmacc.nxv16f32.f32.i64(<vscale x 16 x float>, float, <vscale x 16 x float>, i64)
declare void @llvm.riscv.vse.nxv16f32.i64(<vscale x 16 x float>, <vscale x 16 x float>* nocapture, i64)

; We need a vsetvli in the last block because the predecessors have different
; VTYPEs. The AVL is the same and the SEW/LMUL ratio implies the same VLMAX so
; we don't need to read AVL and can keep VL unchanged.
define <vscale x 2 x i32> @test_vsetvli_x0_x0(<vscale x 2 x i32>* %x, <vscale x 2 x i16>* %y, <vscale x 2 x i32> %z, i64 %vl, i1 %cond) nounwind {
; CHECK-LABEL: test_vsetvli_x0_x0:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu
; CHECK-NEXT: vle32.v v25, (a0)
; CHECK-NEXT: andi a0, a3, 1
; CHECK-NEXT: beqz a0, .LBB9_2
; CHECK-NEXT: # %bb.1: # %if
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
; CHECK-NEXT: vle16.v v26, (a1)
; CHECK-NEXT: vwadd.vx v8, v26, zero
; CHECK-NEXT: .LBB9_2: # %if.end
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
; CHECK-NEXT: vadd.vv v8, v25, v8
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32(<vscale x 2 x i32>* %x, i64 %vl)
br i1 %cond, label %if, label %if.end

if:
%b = call <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(<vscale x 2 x i16>* %y, i64 %vl)
%c = call <vscale x 2 x i32> @llvm.riscv.vwadd.nxv2i32(<vscale x 2 x i16> %b, i16 0, i64 %vl)
br label %if.end

if.end:
%d = phi <vscale x 2 x i32> [ %z, %entry ], [ %c, %if ]
%e = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %d, i64 %vl)
ret <vscale x 2 x i32> %e
}
declare <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32(<vscale x 2 x i32>*, i64)
declare <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(<vscale x 2 x i16>*, i64)
declare <vscale x 2 x i32> @llvm.riscv.vwadd.nxv2i32(<vscale x 2 x i16>, i16, i64)
declare <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, i64)

; We can use X0, X0 vsetvli in if2 and if2.end. The merge point as if.end will
; see two different vtypes with the same SEW/LMUL ratio. At if2.end we will only
; know the SEW/LMUL ratio for the if.end predecessor and the full vtype for
; the if2 predecessor. This makes sure we can merge a SEW/LMUL predecessor with
; a predecessor we know the vtype for.
define <vscale x 2 x i32> @test_vsetvli_x0_x0_2(<vscale x 2 x i32>* %x, <vscale x 2 x i16>* %y, <vscale x 2 x i16>* %z, i64 %vl, i1 %cond, i1 %cond2, <vscale x 2 x i32> %w) nounwind {
; CHECK-LABEL: test_vsetvli_x0_x0_2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a3, e32, m1, ta, mu
; CHECK-NEXT: vle32.v v25, (a0)
; CHECK-NEXT: andi a0, a4, 1
; CHECK-NEXT: beqz a0, .LBB10_2
; CHECK-NEXT: # %bb.1: # %if
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
; CHECK-NEXT: vle16.v v26, (a1)
; CHECK-NEXT: vwadd.wv v25, v25, v26
; CHECK-NEXT: .LBB10_2: # %if.end
; CHECK-NEXT: andi a0, a5, 1
; CHECK-NEXT: beqz a0, .LBB10_4
; CHECK-NEXT: # %bb.3: # %if2
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
; CHECK-NEXT: vle16.v v26, (a2)
; CHECK-NEXT: vwadd.wv v25, v25, v26
; CHECK-NEXT: .LBB10_4: # %if2.end
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
; CHECK-NEXT: vadd.vv v8, v25, v8
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32(<vscale x 2 x i32>* %x, i64 %vl)
br i1 %cond, label %if, label %if.end

if:
%b = call <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(<vscale x 2 x i16>* %y, i64 %vl)
%c = call <vscale x 2 x i32> @llvm.riscv.vwadd.w.nxv2i32.nxv2i16(<vscale x 2 x i32> %a, <vscale x 2 x i16> %b, i64 %vl)
br label %if.end

if.end:
%d = phi <vscale x 2 x i32> [ %a, %entry ], [ %c, %if ]
br i1 %cond2, label %if2, label %if2.end

if2:
%e = call <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(<vscale x 2 x i16>* %z, i64 %vl)
%f = call <vscale x 2 x i32> @llvm.riscv.vwadd.w.nxv2i32.nxv2i16(<vscale x 2 x i32> %d, <vscale x 2 x i16> %e, i64 %vl)
br label %if2.end

if2.end:
%g = phi <vscale x 2 x i32> [ %d, %if.end ], [ %f, %if2 ]
%h = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32(<vscale x 2 x i32> %g, <vscale x 2 x i32> %w, i64 %vl)
ret <vscale x 2 x i32> %h
}
declare <vscale x 2 x i32> @llvm.riscv.vwadd.w.nxv2i32.nxv2i16(<vscale x 2 x i32>, <vscale x 2 x i16>, i64)

0 comments on commit ac87133

Please sign in to comment.