From d87eceda0e6d5de6b2d58430a0124b6f7428695d Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sat, 25 May 2019 16:44:29 +0000 Subject: [PATCH] [X86] Combine fminnum/fmaxnum with non-nan operand to fmin/fmax If we have a known non-nan operand, place it in the second operand of fmin/fmax that is returned if either operand is nan. Differential Revision: https://reviews.llvm.org/D62448 llvm-svn: 361704 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 10 +++-- llvm/test/CodeGen/X86/extract-fp.ll | 20 +-------- llvm/test/CodeGen/X86/fmaxnum.ll | 60 +++++-------------------- llvm/test/CodeGen/X86/fminnum.ll | 60 +++++-------------------- 4 files changed, 29 insertions(+), 121 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 246e494de782c5..e124b7d6c07952 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -40511,9 +40511,6 @@ static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - // TODO: If an operand is already known to be a NaN or not a NaN, this - // should be an optional swap and FMAX/FMIN. - EVT VT = N->getValueType(0); if (!((Subtarget.hasSSE1() && VT == MVT::f32) || (Subtarget.hasSSE2() && VT == MVT::f64) || @@ -40530,6 +40527,13 @@ static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG, if (DAG.getTarget().Options.NoNaNsFPMath || N->getFlags().hasNoNaNs()) return DAG.getNode(MinMaxOp, DL, VT, Op0, Op1, N->getFlags()); + // If one of the operands is known non-NaN use the native min/max instructions + // with the non-NaN input as second operand. + if (DAG.isKnownNeverNaN(Op1)) + return DAG.getNode(MinMaxOp, DL, VT, Op0, Op1, N->getFlags()); + if (DAG.isKnownNeverNaN(Op0)) + return DAG.getNode(MinMaxOp, DL, VT, Op1, Op0, N->getFlags()); + // If we have to respect NaN inputs, this takes at least 3 instructions. // Favor a library call when operating on a scalar and minimizing code size. if (!VT.isVector() && DAG.getMachineFunction().getFunction().hasMinSize()) diff --git a/llvm/test/CodeGen/X86/extract-fp.ll b/llvm/test/CodeGen/X86/extract-fp.ll index ac5a43d046c1e6..27430efa7822c3 100644 --- a/llvm/test/CodeGen/X86/extract-fp.ll +++ b/llvm/test/CodeGen/X86/extract-fp.ll @@ -86,16 +86,8 @@ define float @ext_frem_v4f32_constant_op0(<4 x float> %x) { define float @ext_maxnum_v4f32(<4 x float> %x) nounwind { ; CHECK-LABEL: ext_maxnum_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] -; CHECK-NEXT: movaps %xmm0, %xmm1 -; CHECK-NEXT: cmpunordss %xmm0, %xmm1 -; CHECK-NEXT: movaps %xmm1, %xmm3 -; CHECK-NEXT: andps %xmm2, %xmm3 -; CHECK-NEXT: maxss %xmm0, %xmm2 -; CHECK-NEXT: andnps %xmm2, %xmm1 -; CHECK-NEXT: orps %xmm3, %xmm1 -; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: maxss {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq %v = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float> ) %r = extractelement <4 x float> %v, i32 2 @@ -105,16 +97,8 @@ define float @ext_maxnum_v4f32(<4 x float> %x) nounwind { define double @ext_minnum_v2f64(<2 x double> %x) nounwind { ; CHECK-LABEL: ext_minnum_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero ; CHECK-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] -; CHECK-NEXT: movapd %xmm0, %xmm1 -; CHECK-NEXT: cmpunordsd %xmm0, %xmm1 -; CHECK-NEXT: movapd %xmm1, %xmm3 -; CHECK-NEXT: andpd %xmm2, %xmm3 -; CHECK-NEXT: minsd %xmm0, %xmm2 -; CHECK-NEXT: andnpd %xmm2, %xmm1 -; CHECK-NEXT: orpd %xmm3, %xmm1 -; CHECK-NEXT: movapd %xmm1, %xmm0 +; CHECK-NEXT: minsd {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq %v = call <2 x double> @llvm.minnum.v2f64(<2 x double> , <2 x double> %x) %r = extractelement <2 x double> %v, i32 1 diff --git a/llvm/test/CodeGen/X86/fmaxnum.ll b/llvm/test/CodeGen/X86/fmaxnum.ll index cfe77f47db0fb4..e308412f7cada6 100644 --- a/llvm/test/CodeGen/X86/fmaxnum.ll +++ b/llvm/test/CodeGen/X86/fmaxnum.ll @@ -472,33 +472,13 @@ define <2 x double> @maxnum_intrinsic_nnan_attr_f64(<2 x double> %a, <2 x double define float @test_maxnum_const_op1(float %x) { ; SSE-LABEL: test_maxnum_const_op1: ; SSE: # %bb.0: -; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; SSE-NEXT: movaps %xmm0, %xmm1 -; SSE-NEXT: cmpunordss %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm3 -; SSE-NEXT: andps %xmm2, %xmm3 -; SSE-NEXT: maxss %xmm0, %xmm2 -; SSE-NEXT: andnps %xmm2, %xmm1 -; SSE-NEXT: orps %xmm3, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: maxss {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; -; AVX1-LABEL: test_maxnum_const_op1: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX1-NEXT: vmaxss %xmm0, %xmm1, %xmm2 -; AVX1-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 -; AVX1-NEXT: retq -; -; AVX512-LABEL: test_maxnum_const_op1: -; AVX512: # %bb.0: -; AVX512-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; AVX512-NEXT: vmaxss %xmm0, %xmm2, %xmm1 -; AVX512-NEXT: vcmpunordss %xmm0, %xmm0, %k1 -; AVX512-NEXT: vmovss %xmm2, %xmm0, %xmm1 {%k1} -; AVX512-NEXT: vmovaps %xmm1, %xmm0 -; AVX512-NEXT: retq +; AVX-LABEL: test_maxnum_const_op1: +; AVX: # %bb.0: +; AVX-NEXT: vmaxss {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: retq %r = call float @llvm.maxnum.f32(float 1.0, float %x) ret float %r } @@ -506,33 +486,13 @@ define float @test_maxnum_const_op1(float %x) { define float @test_maxnum_const_op2(float %x) { ; SSE-LABEL: test_maxnum_const_op2: ; SSE: # %bb.0: -; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; SSE-NEXT: movaps %xmm0, %xmm1 -; SSE-NEXT: cmpunordss %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm3 -; SSE-NEXT: andps %xmm2, %xmm3 -; SSE-NEXT: maxss %xmm0, %xmm2 -; SSE-NEXT: andnps %xmm2, %xmm1 -; SSE-NEXT: orps %xmm3, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: maxss {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; -; AVX1-LABEL: test_maxnum_const_op2: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX1-NEXT: vmaxss %xmm0, %xmm1, %xmm2 -; AVX1-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 -; AVX1-NEXT: retq -; -; AVX512-LABEL: test_maxnum_const_op2: -; AVX512: # %bb.0: -; AVX512-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; AVX512-NEXT: vmaxss %xmm0, %xmm2, %xmm1 -; AVX512-NEXT: vcmpunordss %xmm0, %xmm0, %k1 -; AVX512-NEXT: vmovss %xmm2, %xmm0, %xmm1 {%k1} -; AVX512-NEXT: vmovaps %xmm1, %xmm0 -; AVX512-NEXT: retq +; AVX-LABEL: test_maxnum_const_op2: +; AVX: # %bb.0: +; AVX-NEXT: vmaxss {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: retq %r = call float @llvm.maxnum.f32(float %x, float 1.0) ret float %r } diff --git a/llvm/test/CodeGen/X86/fminnum.ll b/llvm/test/CodeGen/X86/fminnum.ll index bbf48deeebcf11..33accf2e49c1f3 100644 --- a/llvm/test/CodeGen/X86/fminnum.ll +++ b/llvm/test/CodeGen/X86/fminnum.ll @@ -472,33 +472,13 @@ define <4 x float> @minnum_intrinsic_nnan_attr_v4f32(<4 x float> %a, <4 x float> define float @test_minnum_const_op1(float %x) { ; SSE-LABEL: test_minnum_const_op1: ; SSE: # %bb.0: -; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; SSE-NEXT: movaps %xmm0, %xmm1 -; SSE-NEXT: cmpunordss %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm3 -; SSE-NEXT: andps %xmm2, %xmm3 -; SSE-NEXT: minss %xmm0, %xmm2 -; SSE-NEXT: andnps %xmm2, %xmm1 -; SSE-NEXT: orps %xmm3, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: minss {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; -; AVX1-LABEL: test_minnum_const_op1: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX1-NEXT: vminss %xmm0, %xmm1, %xmm2 -; AVX1-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 -; AVX1-NEXT: retq -; -; AVX512-LABEL: test_minnum_const_op1: -; AVX512: # %bb.0: -; AVX512-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; AVX512-NEXT: vminss %xmm0, %xmm2, %xmm1 -; AVX512-NEXT: vcmpunordss %xmm0, %xmm0, %k1 -; AVX512-NEXT: vmovss %xmm2, %xmm0, %xmm1 {%k1} -; AVX512-NEXT: vmovaps %xmm1, %xmm0 -; AVX512-NEXT: retq +; AVX-LABEL: test_minnum_const_op1: +; AVX: # %bb.0: +; AVX-NEXT: vminss {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: retq %r = call float @llvm.minnum.f32(float 1.0, float %x) ret float %r } @@ -506,33 +486,13 @@ define float @test_minnum_const_op1(float %x) { define float @test_minnum_const_op2(float %x) { ; SSE-LABEL: test_minnum_const_op2: ; SSE: # %bb.0: -; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; SSE-NEXT: movaps %xmm0, %xmm1 -; SSE-NEXT: cmpunordss %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm3 -; SSE-NEXT: andps %xmm2, %xmm3 -; SSE-NEXT: minss %xmm0, %xmm2 -; SSE-NEXT: andnps %xmm2, %xmm1 -; SSE-NEXT: orps %xmm3, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: minss {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; -; AVX1-LABEL: test_minnum_const_op2: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX1-NEXT: vminss %xmm0, %xmm1, %xmm2 -; AVX1-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 -; AVX1-NEXT: retq -; -; AVX512-LABEL: test_minnum_const_op2: -; AVX512: # %bb.0: -; AVX512-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; AVX512-NEXT: vminss %xmm0, %xmm2, %xmm1 -; AVX512-NEXT: vcmpunordss %xmm0, %xmm0, %k1 -; AVX512-NEXT: vmovss %xmm2, %xmm0, %xmm1 {%k1} -; AVX512-NEXT: vmovaps %xmm1, %xmm0 -; AVX512-NEXT: retq +; AVX-LABEL: test_minnum_const_op2: +; AVX: # %bb.0: +; AVX-NEXT: vminss {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: retq %r = call float @llvm.minnum.f32(float %x, float 1.0) ret float %r }