Skip to content

Commit

Permalink
[InstSimplify] fold FP rounding intrinsic with rounded operand
Browse files Browse the repository at this point in the history
issue #56775

I rearranged the Thumb2 codegen test to avoid simplifying the chain
of rounding instructions. I'm assuming the intent of the test is
to verify lowering of each of those intrinsics.
  • Loading branch information
rotateright committed Jul 31, 2022
1 parent ba29549 commit 02b3a35
Show file tree
Hide file tree
Showing 3 changed files with 98 additions and 127 deletions.
47 changes: 30 additions & 17 deletions llvm/lib/Analysis/InstructionSimplify.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5573,6 +5573,24 @@ static bool isIdempotent(Intrinsic::ID ID) {
}
}

/// Return true if the intrinsic rounds a floating-point value to an integral
/// floating-point value (not an integer type).
static bool removesFPFraction(Intrinsic::ID ID) {
switch (ID) {
default:
return false;

case Intrinsic::floor:
case Intrinsic::ceil:
case Intrinsic::trunc:
case Intrinsic::rint:
case Intrinsic::nearbyint:
case Intrinsic::round:
case Intrinsic::roundeven:
return true;
}
}

static Value *simplifyRelativeLoad(Constant *Ptr, Constant *Offset,
const DataLayout &DL) {
GlobalValue *PtrSym;
Expand Down Expand Up @@ -5638,6 +5656,18 @@ static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0,
if (II->getIntrinsicID() == IID)
return II;

if (removesFPFraction(IID)) {
// Converting from int or calling a rounding function always results in a
// finite integral number or infinity. For those inputs, rounding functions
// always return the same value, so the (2nd) rounding is eliminated. Ex:
// floor (sitofp x) -> sitofp x
// round (ceil x) -> ceil x
auto *II = dyn_cast<IntrinsicInst>(Op0);
if ((II && removesFPFraction(II->getIntrinsicID())) ||
match(Op0, m_SIToFP(m_Value())) || match(Op0, m_UIToFP(m_Value())))
return Op0;
}

Value *X;
switch (IID) {
case Intrinsic::fabs:
Expand Down Expand Up @@ -5695,23 +5725,6 @@ static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0,
match(Op0, m_Intrinsic<Intrinsic::pow>(m_SpecificFP(10.0), m_Value(X))))
return X;
break;
case Intrinsic::floor:
case Intrinsic::trunc:
case Intrinsic::ceil:
case Intrinsic::round:
case Intrinsic::roundeven:
case Intrinsic::nearbyint:
case Intrinsic::rint: {
// floor (sitofp x) -> sitofp x
// floor (uitofp x) -> uitofp x
//
// Converting from int always results in a finite integral number or
// infinity. For either of those inputs, these rounding functions always
// return the same value, so the rounding can be eliminated.
if (match(Op0, m_SIToFP(m_Value())) || match(Op0, m_UIToFP(m_Value())))
return Op0;
break;
}
case Intrinsic::experimental_vector_reverse:
// experimental.vector.reverse(experimental.vector.reverse(x)) -> x
if (match(Op0,
Expand Down
52 changes: 26 additions & 26 deletions llvm/test/CodeGen/Thumb2/mve-laneinterleaving.ll
Original file line number Diff line number Diff line change
Expand Up @@ -578,46 +578,46 @@ entry:
define arm_aapcs_vfpcc <8 x half> @ext_fpintrinsics_trunc_half(<8 x half> %a, <8 x half> %b) {
; CHECK-LABEL: ext_fpintrinsics_trunc_half:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
; CHECK-NEXT: vpush {d8, d9, d10, d11}
; CHECK-NEXT: vcvtb.f32.f16 q2, q0
; CHECK-NEXT: vcvtb.f32.f16 q4, q1
; CHECK-NEXT: vabs.f32 q3, q2
; CHECK-NEXT: vcvtt.f32.f16 q0, q0
; CHECK-NEXT: vrintm.f32 q3, q2
; CHECK-NEXT: vrintx.f32 q5, q4
; CHECK-NEXT: vabs.f32 q3, q3
; CHECK-NEXT: vrinta.f32 q4, q4
; CHECK-NEXT: vminnm.f32 q3, q3, q2
; CHECK-NEXT: vrintp.f32 q2, q2
; CHECK-NEXT: vmaxnm.f32 q3, q3, q5
; CHECK-NEXT: vcvtt.f32.f16 q0, q0
; CHECK-NEXT: vfma.f32 q2, q3, q4
; CHECK-NEXT: vrintm.f32 q3, q0
; CHECK-NEXT: vabs.f32 q3, q3
; CHECK-NEXT: vcvtt.f32.f16 q1, q1
; CHECK-NEXT: vmaxnm.f32 q3, q3, q4
; CHECK-NEXT: vfma.f32 q4, q3, q2
; CHECK-NEXT: vabs.f32 q3, q0
; CHECK-NEXT: vminnm.f32 q3, q3, q0
; CHECK-NEXT: vrintp.f32 q2, q4
; CHECK-NEXT: vmaxnm.f32 q3, q3, q1
; CHECK-NEXT: vrintm.f32 q2, q2
; CHECK-NEXT: vfma.f32 q1, q3, q0
; CHECK-NEXT: vrintx.f32 q2, q2
; CHECK-NEXT: vrintp.f32 q0, q1
; CHECK-NEXT: vrinta.f32 q2, q2
; CHECK-NEXT: vrintm.f32 q0, q0
; CHECK-NEXT: vrintx.f32 q4, q1
; CHECK-NEXT: vmaxnm.f32 q3, q3, q4
; CHECK-NEXT: vrinta.f32 q1, q1
; CHECK-NEXT: vrintp.f32 q0, q0
; CHECK-NEXT: vrintz.f32 q2, q2
; CHECK-NEXT: vrintx.f32 q0, q0
; CHECK-NEXT: vrinta.f32 q0, q0
; CHECK-NEXT: vfma.f32 q0, q3, q1
; CHECK-NEXT: vrintz.f32 q1, q0
; CHECK-NEXT: vcvtb.f16.f32 q0, q2
; CHECK-NEXT: vcvtt.f16.f32 q0, q1
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: bx lr
entry:
%sa = fpext <8 x half> %a to <8 x float>
%sb = fpext <8 x half> %b to <8 x float>
%abs = call <8 x float> @llvm.fabs.v8f32(<8 x float> %sa)
%floor = call <8 x float> @llvm.floor.v8f32(<8 x float> %sa)
%rint = call <8 x float> @llvm.rint.v8f32(<8 x float> %sb)
%ceil = call <8 x float> @llvm.ceil.v8f32(<8 x float> %sa)
%round = call <8 x float> @llvm.round.v8f32(<8 x float> %sb)
%abs = call <8 x float> @llvm.fabs.v8f32(<8 x float> %floor)
%min = call <8 x float> @llvm.minnum.v8f32(<8 x float> %abs, <8 x float> %sa)
%max = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %min, <8 x float> %sb)
%fma = call <8 x float> @llvm.fma.v8f32(<8 x float> %max, <8 x float> %sa, <8 x float> %sb)
%ceil = call <8 x float> @llvm.ceil.v8f32(<8 x float> %fma)
%floor = call <8 x float> @llvm.floor.v8f32(<8 x float> %ceil)
%rint = call <8 x float> @llvm.rint.v8f32(<8 x float> %floor)
%round = call <8 x float> @llvm.round.v8f32(<8 x float> %rint)
%trunc = call <8 x float> @llvm.trunc.v8f32(<8 x float> %round)
%max = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %min, <8 x float> %rint)
%fma = call <8 x float> @llvm.fma.v8f32(<8 x float> %max, <8 x float> %round, <8 x float> %ceil)
%trunc = call <8 x float> @llvm.trunc.v8f32(<8 x float> %fma)
%t = fptrunc <8 x float> %trunc to <8 x half>
ret <8 x half> %t
}
Expand Down
Loading

0 comments on commit 02b3a35

Please sign in to comment.