[InstCombine] Transform (fcmp + fadd + sel) into (fcmp + sel + fadd) #106492

rajatbajpai · 2024-08-29T05:29:58Z

Transform fcmp + fadd + sel into fcmp + sel + fadd which enables the possibility of transforming fcmp + sel into maxnum/minnum intrinsics.

Alive2 results:
https://alive2.llvm.org/ce/z/2cmimW
https://alive2.llvm.org/ce/z/Qh9ZJt
https://alive2.llvm.org/ce/z/vtLj3R

llvmbot · 2024-08-29T05:30:30Z

@llvm/pr-subscribers-llvm-ir

@llvm/pr-subscribers-llvm-transforms

Author: Rajat Bajpai (rajatbajpai)

Changes

Transform fcmp + fadd + sel into fcmp + sel + fadd which enables the possibility of lowering fcmp + sel into fmax/fmin.

Full diff: https://github.com/llvm/llvm-project/pull/106492.diff

2 Files Affected:

(modified) llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp (+45)
(added) llvm/test/Transforms/InstCombine/fcmp-fadd-select.ll (+245)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index fcd11126073bf1..17f1b3a1ec24ae 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -3668,6 +3668,47 @@ static bool hasAffectedValue(Value *V, SmallPtrSetImpl<Value *> &Affected,
   return false;
 }
 
+static Value *foldSelectAddConstant(SelectInst &SI,
+                                    InstCombiner::BuilderTy &Builder) {
+  Value *Cmp;
+  Instruction *FAdd;
+  ConstantFP *C;
+
+  // select((fcmp OGT/OLT, X, 0), (fadd X, C), C) => fadd((select (fcmp OGT/OLT, X, 0), X, 0), C)
+  // This transformation enables the possibility of transforming fcmp + sel into a fmax/fmin.
+
+  // OneUse check for `Cmp` is necessary because it makes sure that other InstCombine
+  // folds don't undo this transformation and cause an infinite loop.
+  if (match(&SI, m_Select(m_OneUse(m_Value(Cmp)), m_OneUse(m_Instruction(FAdd)),
+                          m_ConstantFP(C))) ||
+      match(&SI, m_Select(m_OneUse(m_Value(Cmp)), m_ConstantFP(C),
+                          m_OneUse(m_Instruction(FAdd))))) {
+    Value *X;
+    CmpInst::Predicate Pred;
+    if (!match(Cmp, m_FCmp(Pred, m_Value(X), m_AnyZeroFP())))
+      return nullptr;
+
+    if (Pred != CmpInst::FCMP_OGT && Pred != CmpInst::FCMP_OLT)
+      return nullptr;
+
+    if (!match(FAdd, m_FAdd(m_Specific(X), m_Specific(C))))
+      return nullptr;
+
+    FastMathFlags FMF = FAdd->getFastMathFlags();
+    FMF |= SI.getFastMathFlags();
+
+    Value *NewSelect = Builder.CreateSelect(
+        Cmp, X, ConstantFP::getZero(C->getType()), SI.getName() + ".new", &SI);
+    cast<Instruction>(NewSelect)->setFastMathFlags(FMF);
+
+    Value *NewFAdd =
+        Builder.CreateFAddFMF(NewSelect, C, FAdd, FAdd->getName() + ".new");
+    return NewFAdd;
+  }
+
+  return nullptr;
+}
+
 Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
   Value *CondVal = SI.getCondition();
   Value *TrueVal = SI.getTrueValue();
@@ -4067,6 +4108,10 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
   if (Value *V = foldRoundUpIntegerWithPow2Alignment(SI, Builder))
     return replaceInstUsesWith(SI, V);
 
+  if (Value *V = foldSelectAddConstant(SI, Builder)) {
+    return replaceInstUsesWith(SI, V);
+  }
+
   // select(mask, mload(,,mask,0), 0) -> mload(,,mask,0)
   // Load inst is intentionally not checked for hasOneUse()
   if (match(FalseVal, m_Zero()) &&
diff --git a/llvm/test/Transforms/InstCombine/fcmp-fadd-select.ll b/llvm/test/Transforms/InstCombine/fcmp-fadd-select.ll
new file mode 100644
index 00000000000000..fced2d961b2415
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fcmp-fadd-select.ll
@@ -0,0 +1,245 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+; Check for fcmp + sel pattern which later lowered into fmax
+define float @test_fmax_pos1(float %in) {
+; CHECK-LABEL: define float @test_fmax_pos1(
+; CHECK-SAME: float [[IN:%.*]]) {
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ogt float [[IN]], 0.000000e+00
+; CHECK-NEXT:    [[SEL_NEW:%.*]] = select i1 [[CMP1]], float [[IN]], float 0.000000e+00
+; CHECK-NEXT:    [[ADD_NEW:%.*]] = fadd float [[SEL_NEW]], 1.000000e+00
+; CHECK-NEXT:    ret float [[ADD_NEW]]
+;
+  %cmp1 = fcmp ogt float %in, 0.000000e+00
+  %add = fadd float %in, 1.000000e+00
+  %sel = select i1 %cmp1, float %add, float 1.000000e+00
+  ret float %sel
+}
+
+define float @test_fmax_pos2(float %in) {
+; CHECK-LABEL: define float @test_fmax_pos2(
+; CHECK-SAME: float [[IN:%.*]]) {
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ogt float [[IN]], 0.000000e+00
+; CHECK-NEXT:    [[SEL_NEW:%.*]] = select i1 [[CMP1]], float [[IN]], float 0.000000e+00
+; CHECK-NEXT:    [[ADD_NEW:%.*]] = fadd float [[SEL_NEW]], 1.000000e+00
+; CHECK-NEXT:    ret float [[ADD_NEW]]
+;
+  %cmp1 = fcmp ogt float %in, 0.000000e+00
+  %add = fadd float %in, 1.000000e+00
+  %sel = select i1 %cmp1, float 1.000000e+00, float %add
+  ret float %sel
+}
+
+define float @test_fmax_pos3(float %in) {
+; CHECK-LABEL: define float @test_fmax_pos3(
+; CHECK-SAME: float [[IN:%.*]]) {
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ogt float [[IN]], 0.000000e+00
+; CHECK-NEXT:    [[SEL_NEW:%.*]] = select i1 [[CMP1]], float [[IN]], float 0.000000e+00
+; CHECK-NEXT:    [[ADD_NEW:%.*]] = fadd float [[SEL_NEW]], 1.000000e+00
+; CHECK-NEXT:    ret float [[ADD_NEW]]
+;
+  %cmp1 = fcmp ogt float %in, 0.000000e+00
+  %add = fadd float 1.000000e+00, %in
+  %sel = select i1 %cmp1, float %add, float 1.000000e+00
+  ret float %sel
+}
+
+define float @test_fmax_pos4(float %in) {
+; CHECK-LABEL: define float @test_fmax_pos4(
+; CHECK-SAME: float [[IN:%.*]]) {
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ogt float [[IN]], 0.000000e+00
+; CHECK-NEXT:    [[SEL_NEW:%.*]] = select i1 [[CMP1]], float [[IN]], float 0.000000e+00
+; CHECK-NEXT:    [[ADD_NEW:%.*]] = fadd float [[SEL_NEW]], 1.000000e+00
+; CHECK-NEXT:    ret float [[ADD_NEW]]
+;
+  %cmp1 = fcmp ogt float %in, 0.000000e+00
+  %add = fadd float 1.000000e+00, %in
+  %sel = select i1 %cmp1, float 1.000000e+00, float %add
+  ret float %sel
+}
+
+define float @test_fmax_pos5(float %in) {
+; CHECK-LABEL: define float @test_fmax_pos5(
+; CHECK-SAME: float [[IN:%.*]]) {
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ogt float [[IN]], 0.000000e+00
+; CHECK-NEXT:    [[SEL_NEW:%.*]] = select i1 [[CMP1]], float [[IN]], float 0.000000e+00
+; CHECK-NEXT:    [[ADD_NEW:%.*]] = fadd float [[SEL_NEW]], 2.000000e+00
+; CHECK-NEXT:    ret float [[ADD_NEW]]
+;
+  %cmp1 = fcmp ogt float %in, 0.000000e+00
+  %add = fadd float 2.000000e+00, %in
+  %sel = select i1 %cmp1, float 2.000000e+00, float %add
+  ret float %sel
+}
+
+
+; Check for fcmp + sel pattern which later lowered into fmin
+define float @test_fmin_pos1(float %in) {
+; CHECK-LABEL: define float @test_fmin_pos1(
+; CHECK-SAME: float [[IN:%.*]]) {
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[IN]], 0.000000e+00
+; CHECK-NEXT:    [[SEL_NEW:%.*]] = select i1 [[CMP1]], float [[IN]], float 0.000000e+00
+; CHECK-NEXT:    [[ADD_NEW:%.*]] = fadd float [[SEL_NEW]], 1.000000e+00
+; CHECK-NEXT:    ret float [[ADD_NEW]]
+;
+  %cmp1 = fcmp olt float %in, 0.000000e+00
+  %add = fadd float %in, 1.000000e+00
+  %sel = select i1 %cmp1, float %add, float 1.000000e+00
+  ret float %sel
+}
+
+define float @test_fmin_pos2(float %in) {
+; CHECK-LABEL: define float @test_fmin_pos2(
+; CHECK-SAME: float [[IN:%.*]]) {
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[IN]], 0.000000e+00
+; CHECK-NEXT:    [[SEL_NEW:%.*]] = select i1 [[CMP1]], float [[IN]], float 0.000000e+00
+; CHECK-NEXT:    [[ADD_NEW:%.*]] = fadd float [[SEL_NEW]], 1.000000e+00
+; CHECK-NEXT:    ret float [[ADD_NEW]]
+;
+  %cmp1 = fcmp olt float %in, 0.000000e+00
+  %add = fadd float %in, 1.000000e+00
+  %sel = select i1 %cmp1, float 1.000000e+00, float %add
+  ret float %sel
+}
+
+define float @test_fmin_pos3(float %in) {
+; CHECK-LABEL: define float @test_fmin_pos3(
+; CHECK-SAME: float [[IN:%.*]]) {
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[IN]], 0.000000e+00
+; CHECK-NEXT:    [[SEL_NEW:%.*]] = select i1 [[CMP1]], float [[IN]], float 0.000000e+00
+; CHECK-NEXT:    [[ADD_NEW:%.*]] = fadd float [[SEL_NEW]], 1.000000e+00
+; CHECK-NEXT:    ret float [[ADD_NEW]]
+;
+  %cmp1 = fcmp olt float %in, 0.000000e+00
+  %add = fadd float 1.000000e+00, %in
+  %sel = select i1 %cmp1, float %add, float 1.000000e+00
+  ret float %sel
+}
+
+define float @test_fmin_pos4(float %in) {
+; CHECK-LABEL: define float @test_fmin_pos4(
+; CHECK-SAME: float [[IN:%.*]]) {
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[IN]], 0.000000e+00
+; CHECK-NEXT:    [[SEL_NEW:%.*]] = select i1 [[CMP1]], float [[IN]], float 0.000000e+00
+; CHECK-NEXT:    [[ADD_NEW:%.*]] = fadd float [[SEL_NEW]], 1.000000e+00
+; CHECK-NEXT:    ret float [[ADD_NEW]]
+;
+  %cmp1 = fcmp olt float %in, 0.000000e+00
+  %add = fadd float 1.000000e+00, %in
+  %sel = select i1 %cmp1, float 1.000000e+00, float %add
+  ret float %sel
+}
+
+define float @test_fmin_pos5(float %in) {
+; CHECK-LABEL: define float @test_fmin_pos5(
+; CHECK-SAME: float [[IN:%.*]]) {
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[IN]], 0.000000e+00
+; CHECK-NEXT:    [[SEL_NEW:%.*]] = select i1 [[CMP1]], float [[IN]], float 0.000000e+00
+; CHECK-NEXT:    [[ADD_NEW:%.*]] = fadd float [[SEL_NEW]], 2.000000e+00
+; CHECK-NEXT:    ret float [[ADD_NEW]]
+;
+  %cmp1 = fcmp olt float %in, 0.000000e+00
+  %add = fadd float 2.000000e+00, %in
+  %sel = select i1 %cmp1, float 2.000000e+00, float %add
+  ret float %sel
+}
+
+
+; Check for fmax scenarios that shouldn't be transformed.
+define float @test_fmax_neg1(float %in, float %in2) {
+; CHECK-LABEL: define float @test_fmax_neg1(
+; CHECK-SAME: float [[IN:%.*]], float [[IN2:%.*]]) {
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ogt float [[IN2]], 0.000000e+00
+; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[IN]], 1.000000e+00
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP1]], float [[ADD]], float 1.000000e+00
+; CHECK-NEXT:    ret float [[SEL]]
+;
+  %cmp1 = fcmp ogt float %in2, 0.000000e+00
+  %add = fadd float %in, 1.000000e+00
+  %sel = select i1 %cmp1, float %add, float 1.000000e+00
+  ret float %sel
+}
+
+define float @test_fmax_neg2(float %in) {
+; CHECK-LABEL: define float @test_fmax_neg2(
+; CHECK-SAME: float [[IN:%.*]]) {
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ogt float [[IN]], 1.000000e+00
+; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[IN]], 1.000000e+00
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP1]], float [[ADD]], float 1.000000e+00
+; CHECK-NEXT:    ret float [[SEL]]
+;
+  %cmp1 = fcmp ogt float %in, 1.000000e+00
+  %add = fadd float %in, 1.000000e+00
+  %sel = select i1 %cmp1, float %add, float 1.000000e+00
+  ret float %sel
+}
+
+define float @test_fmax_neg3(float %in) {
+; CHECK-LABEL: define float @test_fmax_neg3(
+; CHECK-SAME: float [[IN:%.*]]) {
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ogt float [[IN]], 0.000000e+00
+; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[IN]], 1.000000e+00
+; CHECK-NEXT:    [[ADD_2:%.*]] = fadd float [[IN]], 1.000000e+00
+; CHECK-NEXT:    [[SEL_1:%.*]] = select i1 [[CMP1]], float [[ADD]], float 1.000000e+00
+; CHECK-NEXT:    [[SEL_2:%.*]] = select i1 [[CMP1]], float 2.000000e+00, float [[ADD_2]]
+; CHECK-NEXT:    [[RES:%.*]] = fadd float [[SEL_1]], [[SEL_2]]
+; CHECK-NEXT:    ret float [[RES]]
+;
+  %cmp1 = fcmp ogt float %in, 0.000000e+00
+  %add = fadd float %in, 1.000000e+00
+  %add.2 = fadd float %in, 1.000000e+00
+  %sel.1 = select i1 %cmp1, float %add, float 1.000000e+00
+  %sel.2 = select i1 %cmp1, float 2.000000e+00, float %add.2
+  %res = fadd float %sel.1, %sel.2
+  ret float %res
+}
+
+
+; Check for fmin scenarios that shouldn't be transformed.
+define float @test_fmin_neg1(float %in, float %in2) {
+; CHECK-LABEL: define float @test_fmin_neg1(
+; CHECK-SAME: float [[IN:%.*]], float [[IN2:%.*]]) {
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[IN2]], 0.000000e+00
+; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[IN]], 1.000000e+00
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP1]], float [[ADD]], float 1.000000e+00
+; CHECK-NEXT:    ret float [[SEL]]
+;
+  %cmp1 = fcmp olt float %in2, 0.000000e+00
+  %add = fadd float %in, 1.000000e+00
+  %sel = select i1 %cmp1, float %add, float 1.000000e+00
+  ret float %sel
+}
+
+define float @test_fmin_neg2(float %in) {
+; CHECK-LABEL: define float @test_fmin_neg2(
+; CHECK-SAME: float [[IN:%.*]]) {
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[IN]], 1.000000e+00
+; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[IN]], 1.000000e+00
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP1]], float [[ADD]], float 1.000000e+00
+; CHECK-NEXT:    ret float [[SEL]]
+;
+  %cmp1 = fcmp olt float %in, 1.000000e+00
+  %add = fadd float %in, 1.000000e+00
+  %sel = select i1 %cmp1, float %add, float 1.000000e+00
+  ret float %sel
+}
+
+define float @test_fmin_neg3(float %in) {
+; CHECK-LABEL: define float @test_fmin_neg3(
+; CHECK-SAME: float [[IN:%.*]]) {
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[IN]], 0.000000e+00
+; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[IN]], 1.000000e+00
+; CHECK-NEXT:    [[ADD_2:%.*]] = fadd float [[IN]], 1.000000e+00
+; CHECK-NEXT:    [[SEL_1:%.*]] = select i1 [[CMP1]], float [[ADD]], float 1.000000e+00
+; CHECK-NEXT:    [[SEL_2:%.*]] = select i1 [[CMP1]], float 2.000000e+00, float [[ADD_2]]
+; CHECK-NEXT:    [[RES:%.*]] = fadd float [[SEL_1]], [[SEL_2]]
+; CHECK-NEXT:    ret float [[RES]]
+;
+  %cmp1 = fcmp olt float %in, 0.000000e+00
+  %add = fadd float %in, 1.000000e+00
+  %add.2 = fadd float %in, 1.000000e+00
+  %sel.1 = select i1 %cmp1, float %add, float 1.000000e+00
+  %sel.2 = select i1 %cmp1, float 2.000000e+00, float %add.2
+  %res = fadd float %sel.1, %sel.2
+  ret float %res
+}

llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp

arsenm · 2024-08-29T07:47:43Z

llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp

+    if (!match(FAdd, m_FAdd(m_Specific(X), m_Specific(C))))
+      return nullptr;


Just match the m_FAdd up originally, instead of matching the temporary m_Instruction above. Same with the fcmp check

Thanks for your suggestions, but I didn't do so for two reasons:

I believe it will make the original condition a little crowded.

Getting FAdd "fast-math-flags" from both conditions will become complex.

We really ought to add variants of the pattern matchers that extract the flags

I actually have a pattern matcher implementation somewhere that extracts the flags, the problem I ran into trying to use it is that our helpers for setting fast-math flags on new instructions only support an Instruction source, not a FastMathFlags variable.

Yes, agree. I think these types of pattern matchers will make handling fast-math flags a little easier.

I have merged the FCmp into the original condition and kept the FAdd as separate because of the flags.

llvm/test/Transforms/InstCombine/fcmp-fadd-select.ll

llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp

arsenm · 2024-08-29T07:57:28Z

llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp

+  // loop.
+  if (match(&SI, m_Select(m_OneUse(m_Value(Cmp)), m_OneUse(m_Instruction(FAdd)),
+                          m_ConstantFP(C))) ||
+      match(&SI, m_Select(m_OneUse(m_Value(Cmp)), m_ConstantFP(C),


The conditions are a bit off. You need to know the constant isn't a nan (it doesn't even really need to be a constant, you just need to know it's not nan, which you can use isKnownNeverNaN for).

Broken: https://alive2.llvm.org/ce/z/65RLiq
Fixed: https://alive2.llvm.org/ce/z/nPXs8M

There is already a transformation for the Value type.

Doing this transformation only when the Select instruction has NaN and NSZ flags. I believe in that case there is no need to check constant for NaN.

llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp

arsenm · 2024-08-29T08:01:49Z

llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp

+    FMF |= SI.getFastMathFlags();
+
+    Value *NewSelect = Builder.CreateSelect(
+        Cmp, X, ConstantFP::getZero(C->getType()), SI.getName() + ".new", &SI);


The identity value for fadd is -0, not +0. This transform as written is incorrect for -0 (but I believe is correct if you just emit -0 here)

We will need to emit the fcmp instruction with -0 as well otherwise two things will happen:

The IC transformation will not break at the condition and cause an infinite loop.

The fcmp + sel combination will not lower into fmax/fmin. https://godbolt.org/z/7r3Mnzq41

I believe it would be safe, but I wanted to verify if there's anything I might be overlooking.

It's not useful to emit fcmp with a -0. Fcmp with 0 or -0 are exactly the same operation

I would emit the -0, and only emit the 0 with an NSZ flag

It's not useful to emit fcmp with a -0. Fcmp with 0 or -0 are exactly the same operation

I think we need to emit fcmp with -0 to break the infinite loop as mentioned above.

rajatbajpai · 2024-09-10T11:02:27Z

Gentle ping for review!

rajatbajpai · 2024-09-16T09:46:11Z

Can someone please take a look at this PR? Thanks!

llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp

arsenm · 2024-09-24T13:10:30Z

llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp

+
+    Value *NewSelect = Builder.CreateSelect(SI.getCondition(), X, Z,
+                                            SI.getName() + ".new", &SI);
+    cast<Instruction>(NewSelect)->setFastMathFlags(SI.getFastMathFlags());


This is redundant with using &SI as the FMF source in the CreateSelect above. You only need the separate flag set if you want to combine with flags from some other operations

Value* CreateSelect (Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)

I believe no operand in CreateSelect copies the FMF from the source instruction. That's why we need setFastMathFlags.

This API is a mess. Usually there's an instruction to take flags from, but CreateSelect takes metadata, I guess. It would be better if all of these had an explicit FMF parameter

Agree, it's inconsistent. 😞

llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp

arsenm · 2024-09-30T12:33:55Z

llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp

+  if (match(&SI, m_Select(m_OneUse(m_FCmp(Pred, m_Value(X), m_Value(Z))),
+                          m_OneUse(m_Instruction(FAdd)), m_Constant(C))) ||
+      match(&SI, m_Select(m_OneUse(m_FCmp(Pred, m_Value(X), m_Value(Z))),
+                          m_Constant(C), m_OneUse(m_Instruction(FAdd))))) {


I wonder if we should have am m_c_Select that handles this commuted case and returns the swapped predicate

Yes, this would make such cases much cleaner. Should we extend the pattern match as part of this change, or should we do it in a separate PR?

Separate, optional

llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp

arsenm · 2024-09-30T12:38:38Z

llvm/test/Transforms/InstCombine/fcmp-fadd-select.ll

+
+; fcmp OGT + fadd + sel => fcmp OGT + sel => fmaxnum
+
+define float @test_fcmp_ogt_fadd_select_constant(float %in) {


This case fails alive: https://alive2.llvm.org/ce/z/_h-8Lw

You can't preserve the nnan unless it was also present on the fadd and fcmp

This change does the below transformation, which seems to be safe https://alive2.llvm.org/ce/z/3XQb24.

define float @src_test_fcmp_ogt_fadd_select_constant(float %in) { %cmp1 = fcmp ogt float %in, 0.000000e+00 %add = fadd float %in, 1.000000e+00 %sel = select nnan nsz i1 %cmp1, float %add, float 1.000000e+00 ret float %sel }

=>

define float @tgt_test_fcmp_ogt_fadd_select_constant(float %in) { %cmp1 = fcmp ogt float %in, 0.000000e+00 %sel = select nnan nsz i1 %cmp1, float %in, float 0.000000e+00 %add = fadd float %sel, 1.000000e+00 ret float %add }

The transformation from tgt_test_fcmp_ogt_fadd_select_constant to llvm.maxnum.f32 is an existing one, as shown here https://godbolt.org/z/6n4Tfrx9e. If this transformation is incorrect, we should probably address it in a separate PR.

Yes, this needs to be fixed. The flag handling for just this can be more aggressive

@arsenm I've been working on making flags more aggressive for the above transformation. I've a question regarding the arguments of Select and FCmp instruction. Since the Select instruction already has the nnan flag, wouldn't that imply that the arguments of FCmp also have this property?

It depends. If the values are in the same block, and the values in the fcmp are a subset of the values in the use instruction

Sorry, I'm not sure if I understand this fully. Isn't for this transformation the arguments of fcmp and select instruction should be the same? If possible could you please provide an example where this is a problem?

arsenm · 2024-09-30T12:42:09Z

llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp

+        Builder.CreateSelect(SI.getCondition(), X, Z, SI.getName(), &SI);
+    cast<Instruction>(NewSelect)->setFastMathFlags(SI.getFastMathFlags());
+
+    return Builder.CreateFAddFMF(NewSelect, C, FAdd, FAdd->getName());


I don't think you can just pass through the flags from the original fadd. I believe you need to and with the fcmp's flags

OK, playing with the above alive link, I think this wants the union of the value flags, and the intersection of the rewrite flags. We ought to have a helper directly in FMF for this case, since this is a common pattern.

So or of nsz / nnan/ ninf, and of reassoc, afn, contract

Sorry, I didn’t quite understand why we need to consider fcmp flags. Could you please elaborate?

The fcmp flags are relevant for the already broken select (fcmp) -.> minnum case.

But separately from that, you still need to correctly merge the select and fadd's flags since you reassociate them.

I see, but without this, I cannot create a failing test case. Do you have any broken scenarios in mind?

The broken scenarios would depend on the usage context. The current guidance is the rewrite flags should be anded in transforms like this: https://llvm.org/docs/LangRef.html#rewrite-based-flags

You only need to demonstrate the rewrite flag intersection, not a behavior change as a result of them

Fixed rewrite-based flags.

dtcxzyw · 2024-10-06T03:45:29Z

@rajatbajpai Can you add alive2 links to the PR description?

arsenm · 2024-10-10T11:34:41Z

llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp

+    Value *NewSelect = Builder.CreateSelect(SI.getCondition(), X, Z, "", &SI);
+    NewSelect->takeName(&SI);
+
+    Value *NewFAdd = Builder.CreateFAdd(NewSelect, C);
+    NewFAdd->takeName(FAdd);
+
+    // Propagate rewrite-based flags
+    auto SelectFMF = SI.getFastMathFlags();
+    auto FAddFMF = FAdd->getFastMathFlags();
+    FastMathFlags CommonFMF, NewFAddFMF, NewSelectFMF;
+
+    CommonFMF.setAllowReassoc(SelectFMF.allowReassoc() &&
+                              FAddFMF.allowReassoc());
+    CommonFMF.setAllowReciprocal(SelectFMF.allowReciprocal() &&
+                                 FAddFMF.allowReciprocal());
+    CommonFMF.setAllowContract(SelectFMF.allowContract() &&
+                               FAddFMF.allowContract());
+    CommonFMF.setApproxFunc(SelectFMF.approxFunc() && FAddFMF.approxFunc());
+    NewSelectFMF = NewFAddFMF = CommonFMF;
+
+    // Propagate FastMath flags
+    NewFAddFMF.setNoNaNs(FAddFMF.noNaNs());
+    NewFAddFMF.setNoInfs(FAddFMF.noInfs());
+    NewFAddFMF.setNoSignedZeros(FAddFMF.noSignedZeros());
+    cast<Instruction>(NewFAdd)->setFastMathFlags(NewFAddFMF);
+
+    NewSelectFMF.setNoNaNs(SelectFMF.noNaNs());
+    NewSelectFMF.setNoInfs(SelectFMF.noInfs());
+    NewSelectFMF.setNoSignedZeros(SelectFMF.noSignedZeros());
+    cast<Instruction>(NewSelect)->setFastMathFlags(NewSelectFMF);


This flag management is too verbose, and this is not an uncommon situation.

I think it is time to introduce (or replace) the IRBuilder Create*FMF functions with overloads that directly take a FastMathFlags parameter. There should also be one for select, which there doesn't appear to be one already.

More importantly, we need new helper functions for merging fast math flags. Most of the verbosity is from intersecting the rewrite flags. We should have some intersectRewrite and unionValue flag helpers directly in FastMathFlags.

The Create*FMF functions takes an instruction from which to copy FastMathFlags.

I'll add intersectRewite and unionValue helpers in FastMathFlags.

Yes, I know. I am saying the Create*FMF API is bad. There should be new overloads that have an explicit FMF parameter. The take from instruction API is also questionable, we should consider removing it

Understood. Thanks!

llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp

arsenm · 2024-10-10T11:36:05Z

llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp

+    NewFAddFMF.setNoNaNs(FAddFMF.noNaNs());
+    NewFAddFMF.setNoInfs(FAddFMF.noInfs());


I think the value flags can be unioned between these (but should double check this, it might depend on the constants being known not-nan)

I see, will check.

Wouldn't nnan guarantee be suffice for union?

llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp

Line 3709 in 301cc8c

if (!SIFOp || !SIFOp->hasNoSignedZeros() || !SIFOp->hasNoNaNs())

Check alive2 for nnan on each individual instruction

If the constant is nan and the select instruction doesn't have a nnan flag, then we can't do this. However, this transformation triggers only when the Select instruction has a nnan flag. https://alive2.llvm.org/ce/z/CoRFC3

Transform `fcmp + fadd + sel` into `fcmp + sel + fadd` which enables the possibility of lowering `fcmp + sel` into `fmax/fmin`.

rajatbajpai · 2024-10-23T06:22:50Z

ping @arsenm

rajatbajpai · 2024-11-06T04:59:29Z

Gentle ping for review.

arsenm · 2024-11-11T20:09:58Z

llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp

+    // Propagate FastMath flags
+    FastMathFlags SelectFMF = SI.getFastMathFlags();
+    FastMathFlags FAddFMF = FAdd->getFastMathFlags();
+    FastMathFlags NewFMF = FastMathFlags::intersectRewrite(SelectFMF, FAddFMF) |
+                           FastMathFlags::unionValue(SelectFMF, FAddFMF);
+    cast<Instruction>(NewFAdd)->setFastMathFlags(NewFMF);
+    cast<Instruction>(NewSelect)->setFastMathFlags(NewFMF);


Next API cleanup should move this into the original Create* above

llvm-ci · 2024-11-11T20:21:47Z

LLVM Buildbot has detected a new failure on builder llvm-nvptx64-nvidia-ubuntu running on as-builder-7 while building llvm at step 6 "test-build-unified-tree-check-llvm".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/160/builds/8207