-
Notifications
You must be signed in to change notification settings - Fork 12.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[InstCombine] Transform (fcmp + fadd + sel) into (fcmp + sel + fadd) #106492
[InstCombine] Transform (fcmp + fadd + sel) into (fcmp + sel + fadd) #106492
Conversation
@llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-llvm-transforms Author: Rajat Bajpai (rajatbajpai) ChangesTransform Full diff: https://github.com/llvm/llvm-project/pull/106492.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index fcd11126073bf1..17f1b3a1ec24ae 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -3668,6 +3668,47 @@ static bool hasAffectedValue(Value *V, SmallPtrSetImpl<Value *> &Affected,
return false;
}
+static Value *foldSelectAddConstant(SelectInst &SI,
+ InstCombiner::BuilderTy &Builder) {
+ Value *Cmp;
+ Instruction *FAdd;
+ ConstantFP *C;
+
+ // select((fcmp OGT/OLT, X, 0), (fadd X, C), C) => fadd((select (fcmp OGT/OLT, X, 0), X, 0), C)
+ // This transformation enables the possibility of transforming fcmp + sel into a fmax/fmin.
+
+ // OneUse check for `Cmp` is necessary because it makes sure that other InstCombine
+ // folds don't undo this transformation and cause an infinite loop.
+ if (match(&SI, m_Select(m_OneUse(m_Value(Cmp)), m_OneUse(m_Instruction(FAdd)),
+ m_ConstantFP(C))) ||
+ match(&SI, m_Select(m_OneUse(m_Value(Cmp)), m_ConstantFP(C),
+ m_OneUse(m_Instruction(FAdd))))) {
+ Value *X;
+ CmpInst::Predicate Pred;
+ if (!match(Cmp, m_FCmp(Pred, m_Value(X), m_AnyZeroFP())))
+ return nullptr;
+
+ if (Pred != CmpInst::FCMP_OGT && Pred != CmpInst::FCMP_OLT)
+ return nullptr;
+
+ if (!match(FAdd, m_FAdd(m_Specific(X), m_Specific(C))))
+ return nullptr;
+
+ FastMathFlags FMF = FAdd->getFastMathFlags();
+ FMF |= SI.getFastMathFlags();
+
+ Value *NewSelect = Builder.CreateSelect(
+ Cmp, X, ConstantFP::getZero(C->getType()), SI.getName() + ".new", &SI);
+ cast<Instruction>(NewSelect)->setFastMathFlags(FMF);
+
+ Value *NewFAdd =
+ Builder.CreateFAddFMF(NewSelect, C, FAdd, FAdd->getName() + ".new");
+ return NewFAdd;
+ }
+
+ return nullptr;
+}
+
Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
Value *CondVal = SI.getCondition();
Value *TrueVal = SI.getTrueValue();
@@ -4067,6 +4108,10 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
if (Value *V = foldRoundUpIntegerWithPow2Alignment(SI, Builder))
return replaceInstUsesWith(SI, V);
+ if (Value *V = foldSelectAddConstant(SI, Builder)) {
+ return replaceInstUsesWith(SI, V);
+ }
+
// select(mask, mload(,,mask,0), 0) -> mload(,,mask,0)
// Load inst is intentionally not checked for hasOneUse()
if (match(FalseVal, m_Zero()) &&
diff --git a/llvm/test/Transforms/InstCombine/fcmp-fadd-select.ll b/llvm/test/Transforms/InstCombine/fcmp-fadd-select.ll
new file mode 100644
index 00000000000000..fced2d961b2415
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fcmp-fadd-select.ll
@@ -0,0 +1,245 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+; Check for fcmp + sel pattern which later lowered into fmax
+define float @test_fmax_pos1(float %in) {
+; CHECK-LABEL: define float @test_fmax_pos1(
+; CHECK-SAME: float [[IN:%.*]]) {
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt float [[IN]], 0.000000e+00
+; CHECK-NEXT: [[SEL_NEW:%.*]] = select i1 [[CMP1]], float [[IN]], float 0.000000e+00
+; CHECK-NEXT: [[ADD_NEW:%.*]] = fadd float [[SEL_NEW]], 1.000000e+00
+; CHECK-NEXT: ret float [[ADD_NEW]]
+;
+ %cmp1 = fcmp ogt float %in, 0.000000e+00
+ %add = fadd float %in, 1.000000e+00
+ %sel = select i1 %cmp1, float %add, float 1.000000e+00
+ ret float %sel
+}
+
+define float @test_fmax_pos2(float %in) {
+; CHECK-LABEL: define float @test_fmax_pos2(
+; CHECK-SAME: float [[IN:%.*]]) {
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt float [[IN]], 0.000000e+00
+; CHECK-NEXT: [[SEL_NEW:%.*]] = select i1 [[CMP1]], float [[IN]], float 0.000000e+00
+; CHECK-NEXT: [[ADD_NEW:%.*]] = fadd float [[SEL_NEW]], 1.000000e+00
+; CHECK-NEXT: ret float [[ADD_NEW]]
+;
+ %cmp1 = fcmp ogt float %in, 0.000000e+00
+ %add = fadd float %in, 1.000000e+00
+ %sel = select i1 %cmp1, float 1.000000e+00, float %add
+ ret float %sel
+}
+
+define float @test_fmax_pos3(float %in) {
+; CHECK-LABEL: define float @test_fmax_pos3(
+; CHECK-SAME: float [[IN:%.*]]) {
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt float [[IN]], 0.000000e+00
+; CHECK-NEXT: [[SEL_NEW:%.*]] = select i1 [[CMP1]], float [[IN]], float 0.000000e+00
+; CHECK-NEXT: [[ADD_NEW:%.*]] = fadd float [[SEL_NEW]], 1.000000e+00
+; CHECK-NEXT: ret float [[ADD_NEW]]
+;
+ %cmp1 = fcmp ogt float %in, 0.000000e+00
+ %add = fadd float 1.000000e+00, %in
+ %sel = select i1 %cmp1, float %add, float 1.000000e+00
+ ret float %sel
+}
+
+define float @test_fmax_pos4(float %in) {
+; CHECK-LABEL: define float @test_fmax_pos4(
+; CHECK-SAME: float [[IN:%.*]]) {
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt float [[IN]], 0.000000e+00
+; CHECK-NEXT: [[SEL_NEW:%.*]] = select i1 [[CMP1]], float [[IN]], float 0.000000e+00
+; CHECK-NEXT: [[ADD_NEW:%.*]] = fadd float [[SEL_NEW]], 1.000000e+00
+; CHECK-NEXT: ret float [[ADD_NEW]]
+;
+ %cmp1 = fcmp ogt float %in, 0.000000e+00
+ %add = fadd float 1.000000e+00, %in
+ %sel = select i1 %cmp1, float 1.000000e+00, float %add
+ ret float %sel
+}
+
+define float @test_fmax_pos5(float %in) {
+; CHECK-LABEL: define float @test_fmax_pos5(
+; CHECK-SAME: float [[IN:%.*]]) {
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt float [[IN]], 0.000000e+00
+; CHECK-NEXT: [[SEL_NEW:%.*]] = select i1 [[CMP1]], float [[IN]], float 0.000000e+00
+; CHECK-NEXT: [[ADD_NEW:%.*]] = fadd float [[SEL_NEW]], 2.000000e+00
+; CHECK-NEXT: ret float [[ADD_NEW]]
+;
+ %cmp1 = fcmp ogt float %in, 0.000000e+00
+ %add = fadd float 2.000000e+00, %in
+ %sel = select i1 %cmp1, float 2.000000e+00, float %add
+ ret float %sel
+}
+
+
+; Check for fcmp + sel pattern which later lowered into fmin
+define float @test_fmin_pos1(float %in) {
+; CHECK-LABEL: define float @test_fmin_pos1(
+; CHECK-SAME: float [[IN:%.*]]) {
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float [[IN]], 0.000000e+00
+; CHECK-NEXT: [[SEL_NEW:%.*]] = select i1 [[CMP1]], float [[IN]], float 0.000000e+00
+; CHECK-NEXT: [[ADD_NEW:%.*]] = fadd float [[SEL_NEW]], 1.000000e+00
+; CHECK-NEXT: ret float [[ADD_NEW]]
+;
+ %cmp1 = fcmp olt float %in, 0.000000e+00
+ %add = fadd float %in, 1.000000e+00
+ %sel = select i1 %cmp1, float %add, float 1.000000e+00
+ ret float %sel
+}
+
+define float @test_fmin_pos2(float %in) {
+; CHECK-LABEL: define float @test_fmin_pos2(
+; CHECK-SAME: float [[IN:%.*]]) {
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float [[IN]], 0.000000e+00
+; CHECK-NEXT: [[SEL_NEW:%.*]] = select i1 [[CMP1]], float [[IN]], float 0.000000e+00
+; CHECK-NEXT: [[ADD_NEW:%.*]] = fadd float [[SEL_NEW]], 1.000000e+00
+; CHECK-NEXT: ret float [[ADD_NEW]]
+;
+ %cmp1 = fcmp olt float %in, 0.000000e+00
+ %add = fadd float %in, 1.000000e+00
+ %sel = select i1 %cmp1, float 1.000000e+00, float %add
+ ret float %sel
+}
+
+define float @test_fmin_pos3(float %in) {
+; CHECK-LABEL: define float @test_fmin_pos3(
+; CHECK-SAME: float [[IN:%.*]]) {
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float [[IN]], 0.000000e+00
+; CHECK-NEXT: [[SEL_NEW:%.*]] = select i1 [[CMP1]], float [[IN]], float 0.000000e+00
+; CHECK-NEXT: [[ADD_NEW:%.*]] = fadd float [[SEL_NEW]], 1.000000e+00
+; CHECK-NEXT: ret float [[ADD_NEW]]
+;
+ %cmp1 = fcmp olt float %in, 0.000000e+00
+ %add = fadd float 1.000000e+00, %in
+ %sel = select i1 %cmp1, float %add, float 1.000000e+00
+ ret float %sel
+}
+
+define float @test_fmin_pos4(float %in) {
+; CHECK-LABEL: define float @test_fmin_pos4(
+; CHECK-SAME: float [[IN:%.*]]) {
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float [[IN]], 0.000000e+00
+; CHECK-NEXT: [[SEL_NEW:%.*]] = select i1 [[CMP1]], float [[IN]], float 0.000000e+00
+; CHECK-NEXT: [[ADD_NEW:%.*]] = fadd float [[SEL_NEW]], 1.000000e+00
+; CHECK-NEXT: ret float [[ADD_NEW]]
+;
+ %cmp1 = fcmp olt float %in, 0.000000e+00
+ %add = fadd float 1.000000e+00, %in
+ %sel = select i1 %cmp1, float 1.000000e+00, float %add
+ ret float %sel
+}
+
+define float @test_fmin_pos5(float %in) {
+; CHECK-LABEL: define float @test_fmin_pos5(
+; CHECK-SAME: float [[IN:%.*]]) {
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float [[IN]], 0.000000e+00
+; CHECK-NEXT: [[SEL_NEW:%.*]] = select i1 [[CMP1]], float [[IN]], float 0.000000e+00
+; CHECK-NEXT: [[ADD_NEW:%.*]] = fadd float [[SEL_NEW]], 2.000000e+00
+; CHECK-NEXT: ret float [[ADD_NEW]]
+;
+ %cmp1 = fcmp olt float %in, 0.000000e+00
+ %add = fadd float 2.000000e+00, %in
+ %sel = select i1 %cmp1, float 2.000000e+00, float %add
+ ret float %sel
+}
+
+
+; Check for fmax scenarios that shouldn't be transformed.
+define float @test_fmax_neg1(float %in, float %in2) {
+; CHECK-LABEL: define float @test_fmax_neg1(
+; CHECK-SAME: float [[IN:%.*]], float [[IN2:%.*]]) {
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt float [[IN2]], 0.000000e+00
+; CHECK-NEXT: [[ADD:%.*]] = fadd float [[IN]], 1.000000e+00
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP1]], float [[ADD]], float 1.000000e+00
+; CHECK-NEXT: ret float [[SEL]]
+;
+ %cmp1 = fcmp ogt float %in2, 0.000000e+00
+ %add = fadd float %in, 1.000000e+00
+ %sel = select i1 %cmp1, float %add, float 1.000000e+00
+ ret float %sel
+}
+
+define float @test_fmax_neg2(float %in) {
+; CHECK-LABEL: define float @test_fmax_neg2(
+; CHECK-SAME: float [[IN:%.*]]) {
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt float [[IN]], 1.000000e+00
+; CHECK-NEXT: [[ADD:%.*]] = fadd float [[IN]], 1.000000e+00
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP1]], float [[ADD]], float 1.000000e+00
+; CHECK-NEXT: ret float [[SEL]]
+;
+ %cmp1 = fcmp ogt float %in, 1.000000e+00
+ %add = fadd float %in, 1.000000e+00
+ %sel = select i1 %cmp1, float %add, float 1.000000e+00
+ ret float %sel
+}
+
+define float @test_fmax_neg3(float %in) {
+; CHECK-LABEL: define float @test_fmax_neg3(
+; CHECK-SAME: float [[IN:%.*]]) {
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt float [[IN]], 0.000000e+00
+; CHECK-NEXT: [[ADD:%.*]] = fadd float [[IN]], 1.000000e+00
+; CHECK-NEXT: [[ADD_2:%.*]] = fadd float [[IN]], 1.000000e+00
+; CHECK-NEXT: [[SEL_1:%.*]] = select i1 [[CMP1]], float [[ADD]], float 1.000000e+00
+; CHECK-NEXT: [[SEL_2:%.*]] = select i1 [[CMP1]], float 2.000000e+00, float [[ADD_2]]
+; CHECK-NEXT: [[RES:%.*]] = fadd float [[SEL_1]], [[SEL_2]]
+; CHECK-NEXT: ret float [[RES]]
+;
+ %cmp1 = fcmp ogt float %in, 0.000000e+00
+ %add = fadd float %in, 1.000000e+00
+ %add.2 = fadd float %in, 1.000000e+00
+ %sel.1 = select i1 %cmp1, float %add, float 1.000000e+00
+ %sel.2 = select i1 %cmp1, float 2.000000e+00, float %add.2
+ %res = fadd float %sel.1, %sel.2
+ ret float %res
+}
+
+
+; Check for fmin scenarios that shouldn't be transformed.
+define float @test_fmin_neg1(float %in, float %in2) {
+; CHECK-LABEL: define float @test_fmin_neg1(
+; CHECK-SAME: float [[IN:%.*]], float [[IN2:%.*]]) {
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float [[IN2]], 0.000000e+00
+; CHECK-NEXT: [[ADD:%.*]] = fadd float [[IN]], 1.000000e+00
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP1]], float [[ADD]], float 1.000000e+00
+; CHECK-NEXT: ret float [[SEL]]
+;
+ %cmp1 = fcmp olt float %in2, 0.000000e+00
+ %add = fadd float %in, 1.000000e+00
+ %sel = select i1 %cmp1, float %add, float 1.000000e+00
+ ret float %sel
+}
+
+define float @test_fmin_neg2(float %in) {
+; CHECK-LABEL: define float @test_fmin_neg2(
+; CHECK-SAME: float [[IN:%.*]]) {
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float [[IN]], 1.000000e+00
+; CHECK-NEXT: [[ADD:%.*]] = fadd float [[IN]], 1.000000e+00
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP1]], float [[ADD]], float 1.000000e+00
+; CHECK-NEXT: ret float [[SEL]]
+;
+ %cmp1 = fcmp olt float %in, 1.000000e+00
+ %add = fadd float %in, 1.000000e+00
+ %sel = select i1 %cmp1, float %add, float 1.000000e+00
+ ret float %sel
+}
+
+define float @test_fmin_neg3(float %in) {
+; CHECK-LABEL: define float @test_fmin_neg3(
+; CHECK-SAME: float [[IN:%.*]]) {
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float [[IN]], 0.000000e+00
+; CHECK-NEXT: [[ADD:%.*]] = fadd float [[IN]], 1.000000e+00
+; CHECK-NEXT: [[ADD_2:%.*]] = fadd float [[IN]], 1.000000e+00
+; CHECK-NEXT: [[SEL_1:%.*]] = select i1 [[CMP1]], float [[ADD]], float 1.000000e+00
+; CHECK-NEXT: [[SEL_2:%.*]] = select i1 [[CMP1]], float 2.000000e+00, float [[ADD_2]]
+; CHECK-NEXT: [[RES:%.*]] = fadd float [[SEL_1]], [[SEL_2]]
+; CHECK-NEXT: ret float [[RES]]
+;
+ %cmp1 = fcmp olt float %in, 0.000000e+00
+ %add = fadd float %in, 1.000000e+00
+ %add.2 = fadd float %in, 1.000000e+00
+ %sel.1 = select i1 %cmp1, float %add, float 1.000000e+00
+ %sel.2 = select i1 %cmp1, float 2.000000e+00, float %add.2
+ %res = fadd float %sel.1, %sel.2
+ ret float %res
+}
|
c88ba46
to
6944648
Compare
if (!match(FAdd, m_FAdd(m_Specific(X), m_Specific(C)))) | ||
return nullptr; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just match the m_FAdd up originally, instead of matching the temporary m_Instruction above. Same with the fcmp check
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for your suggestions, but I didn't do so for two reasons:
- I believe it will make the original condition a little crowded.
- Getting FAdd "fast-math-flags" from both conditions will become complex.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We really ought to add variants of the pattern matchers that extract the flags
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I actually have a pattern matcher implementation somewhere that extracts the flags, the problem I ran into trying to use it is that our helpers for setting fast-math flags on new instructions only support an Instruction source, not a FastMathFlags
variable.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, agree. I think these types of pattern matchers will make handling fast-math flags a little easier.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have merged the FCmp
into the original condition and kept the FAdd
as separate because of the flags.
// loop. | ||
if (match(&SI, m_Select(m_OneUse(m_Value(Cmp)), m_OneUse(m_Instruction(FAdd)), | ||
m_ConstantFP(C))) || | ||
match(&SI, m_Select(m_OneUse(m_Value(Cmp)), m_ConstantFP(C), |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The conditions are a bit off. You need to know the constant isn't a nan (it doesn't even really need to be a constant, you just need to know it's not nan, which you can use isKnownNeverNaN for).
Broken: https://alive2.llvm.org/ce/z/65RLiq
Fixed: https://alive2.llvm.org/ce/z/nPXs8M
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There is already a transformation for the Value type.
Doing this transformation only when the Select instruction has NaN and NSZ flags. I believe in that case there is no need to check constant for NaN.
FMF |= SI.getFastMathFlags(); | ||
|
||
Value *NewSelect = Builder.CreateSelect( | ||
Cmp, X, ConstantFP::getZero(C->getType()), SI.getName() + ".new", &SI); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The identity value for fadd is -0, not +0. This transform as written is incorrect for -0 (but I believe is correct if you just emit -0 here)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We will need to emit the fcmp
instruction with -0
as well otherwise two things will happen:
- The IC transformation will not break at the condition and cause an infinite loop.
- The
fcmp + sel
combination will not lower intofmax/fmin
. https://godbolt.org/z/7r3Mnzq41
I believe it would be safe, but I wanted to verify if there's anything I might be overlooking.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's not useful to emit fcmp with a -0. Fcmp with 0 or -0 are exactly the same operation
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would emit the -0, and only emit the 0 with an NSZ flag
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's not useful to emit fcmp with a -0. Fcmp with 0 or -0 are exactly the same operation
I think we need to emit fcmp with -0 to break the infinite loop as mentioned above.
6944648
to
603b736
Compare
Gentle ping for review! |
Can someone please take a look at this PR? Thanks! |
|
||
Value *NewSelect = Builder.CreateSelect(SI.getCondition(), X, Z, | ||
SI.getName() + ".new", &SI); | ||
cast<Instruction>(NewSelect)->setFastMathFlags(SI.getFastMathFlags()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is redundant with using &SI as the FMF source in the CreateSelect above. You only need the separate flag set if you want to combine with flags from some other operations
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Value* CreateSelect (Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
I believe no operand in CreateSelect
copies the FMF from the source instruction. That's why we need setFastMathFlags
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This API is a mess. Usually there's an instruction to take flags from, but CreateSelect takes metadata, I guess. It would be better if all of these had an explicit FMF parameter
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Agree, it's inconsistent. 😞
603b736
to
d1d022c
Compare
if (match(&SI, m_Select(m_OneUse(m_FCmp(Pred, m_Value(X), m_Value(Z))), | ||
m_OneUse(m_Instruction(FAdd)), m_Constant(C))) || | ||
match(&SI, m_Select(m_OneUse(m_FCmp(Pred, m_Value(X), m_Value(Z))), | ||
m_Constant(C), m_OneUse(m_Instruction(FAdd))))) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wonder if we should have am m_c_Select that handles this commuted case and returns the swapped predicate
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, this would make such cases much cleaner. Should we extend the pattern match as part of this change, or should we do it in a separate PR?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Separate, optional
|
||
; fcmp OGT + fadd + sel => fcmp OGT + sel => fmaxnum | ||
|
||
define float @test_fcmp_ogt_fadd_select_constant(float %in) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This case fails alive: https://alive2.llvm.org/ce/z/_h-8Lw
You can't preserve the nnan unless it was also present on the fadd and fcmp
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This change does the below transformation, which seems to be safe https://alive2.llvm.org/ce/z/3XQb24.
define float @src_test_fcmp_ogt_fadd_select_constant(float %in) {
%cmp1 = fcmp ogt float %in, 0.000000e+00
%add = fadd float %in, 1.000000e+00
%sel = select nnan nsz i1 %cmp1, float %add, float 1.000000e+00
ret float %sel
}
=>
define float @tgt_test_fcmp_ogt_fadd_select_constant(float %in) {
%cmp1 = fcmp ogt float %in, 0.000000e+00
%sel = select nnan nsz i1 %cmp1, float %in, float 0.000000e+00
%add = fadd float %sel, 1.000000e+00
ret float %add
}
The transformation from tgt_test_fcmp_ogt_fadd_select_constant
to llvm.maxnum.f32
is an existing one, as shown here https://godbolt.org/z/6n4Tfrx9e. If this transformation is incorrect, we should probably address it in a separate PR.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, this needs to be fixed. The flag handling for just this can be more aggressive
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@arsenm I've been working on making flags more aggressive for the above transformation. I've a question regarding the arguments of Select and FCmp instruction. Since the Select instruction already has the nnan
flag, wouldn't that imply that the arguments of FCmp also have this property?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It depends. If the values are in the same block, and the values in the fcmp are a subset of the values in the use instruction
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry, I'm not sure if I understand this fully. Isn't for this transformation the arguments of fcmp
and select
instruction should be the same? If possible could you please provide an example where this is a problem?
Builder.CreateSelect(SI.getCondition(), X, Z, SI.getName(), &SI); | ||
cast<Instruction>(NewSelect)->setFastMathFlags(SI.getFastMathFlags()); | ||
|
||
return Builder.CreateFAddFMF(NewSelect, C, FAdd, FAdd->getName()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think you can just pass through the flags from the original fadd. I believe you need to and with the fcmp's flags
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK, playing with the above alive link, I think this wants the union of the value flags, and the intersection of the rewrite flags. We ought to have a helper directly in FMF for this case, since this is a common pattern.
So or of nsz / nnan/ ninf, and of reassoc, afn, contract
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry, I didn’t quite understand why we need to consider fcmp flags. Could you please elaborate?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The fcmp flags are relevant for the already broken select (fcmp) -.> minnum case.
But separately from that, you still need to correctly merge the select and fadd's flags since you reassociate them.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see, but without this, I cannot create a failing test case. Do you have any broken scenarios in mind?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The broken scenarios would depend on the usage context. The current guidance is the rewrite flags should be anded in transforms like this: https://llvm.org/docs/LangRef.html#rewrite-based-flags
You only need to demonstrate the rewrite flag intersection, not a behavior change as a result of them
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed rewrite-based flags.
d1d022c
to
a86a72f
Compare
@rajatbajpai Can you add alive2 links to the PR description? |
0f8eb7b
to
301cc8c
Compare
Value *NewSelect = Builder.CreateSelect(SI.getCondition(), X, Z, "", &SI); | ||
NewSelect->takeName(&SI); | ||
|
||
Value *NewFAdd = Builder.CreateFAdd(NewSelect, C); | ||
NewFAdd->takeName(FAdd); | ||
|
||
// Propagate rewrite-based flags | ||
auto SelectFMF = SI.getFastMathFlags(); | ||
auto FAddFMF = FAdd->getFastMathFlags(); | ||
FastMathFlags CommonFMF, NewFAddFMF, NewSelectFMF; | ||
|
||
CommonFMF.setAllowReassoc(SelectFMF.allowReassoc() && | ||
FAddFMF.allowReassoc()); | ||
CommonFMF.setAllowReciprocal(SelectFMF.allowReciprocal() && | ||
FAddFMF.allowReciprocal()); | ||
CommonFMF.setAllowContract(SelectFMF.allowContract() && | ||
FAddFMF.allowContract()); | ||
CommonFMF.setApproxFunc(SelectFMF.approxFunc() && FAddFMF.approxFunc()); | ||
NewSelectFMF = NewFAddFMF = CommonFMF; | ||
|
||
// Propagate FastMath flags | ||
NewFAddFMF.setNoNaNs(FAddFMF.noNaNs()); | ||
NewFAddFMF.setNoInfs(FAddFMF.noInfs()); | ||
NewFAddFMF.setNoSignedZeros(FAddFMF.noSignedZeros()); | ||
cast<Instruction>(NewFAdd)->setFastMathFlags(NewFAddFMF); | ||
|
||
NewSelectFMF.setNoNaNs(SelectFMF.noNaNs()); | ||
NewSelectFMF.setNoInfs(SelectFMF.noInfs()); | ||
NewSelectFMF.setNoSignedZeros(SelectFMF.noSignedZeros()); | ||
cast<Instruction>(NewSelect)->setFastMathFlags(NewSelectFMF); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This flag management is too verbose, and this is not an uncommon situation.
I think it is time to introduce (or replace) the IRBuilder Create*FMF functions with overloads that directly take a FastMathFlags parameter. There should also be one for select, which there doesn't appear to be one already.
More importantly, we need new helper functions for merging fast math flags. Most of the verbosity is from intersecting the rewrite flags. We should have some intersectRewrite and unionValue flag helpers directly in FastMathFlags.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The Create*FMF functions takes an instruction from which to copy FastMathFlags.
I'll add intersectRewite
and unionValue
helpers in FastMathFlags.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, I know. I am saying the Create*FMF API is bad. There should be new overloads that have an explicit FMF parameter. The take from instruction API is also questionable, we should consider removing it
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Understood. Thanks!
NewFAddFMF.setNoNaNs(FAddFMF.noNaNs()); | ||
NewFAddFMF.setNoInfs(FAddFMF.noInfs()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the value flags can be unioned between these (but should double check this, it might depend on the constants being known not-nan)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see, will check.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Wouldn't nnan guarantee be suffice for union?
if (!SIFOp || !SIFOp->hasNoSignedZeros() || !SIFOp->hasNoNaNs()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Check alive2 for nnan on each individual instruction
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If the constant is nan and the select instruction doesn't have a nnan flag, then we can't do this. However, this transformation triggers only when the Select instruction has a nnan flag. https://alive2.llvm.org/ce/z/CoRFC3
Transform `fcmp + fadd + sel` into `fcmp + sel + fadd` which enables the possibility of lowering `fcmp + sel` into `fmax/fmin`.
301cc8c
to
77fc700
Compare
ping @arsenm |
Gentle ping for review. |
// Propagate FastMath flags | ||
FastMathFlags SelectFMF = SI.getFastMathFlags(); | ||
FastMathFlags FAddFMF = FAdd->getFastMathFlags(); | ||
FastMathFlags NewFMF = FastMathFlags::intersectRewrite(SelectFMF, FAddFMF) | | ||
FastMathFlags::unionValue(SelectFMF, FAddFMF); | ||
cast<Instruction>(NewFAdd)->setFastMathFlags(NewFMF); | ||
cast<Instruction>(NewSelect)->setFastMathFlags(NewFMF); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Next API cleanup should move this into the original Create* above
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/160/builds/8207 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/144/builds/11364 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/180/builds/8205 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/12/builds/9364 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/140/builds/10640 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/190/builds/9272 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/185/builds/8321 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/175/builds/8331 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/137/builds/8430 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/174/builds/8259 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/46/builds/7711 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/95/builds/6091 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/108/builds/5827 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/153/builds/14333 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/56/builds/11963 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/60/builds/12443 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/33/builds/6282 Here is the relevant piece of the build log for the reference
|
…lvm#106492) Transform `fcmp + fadd + sel` into `fcmp + sel + fadd` which enables the possibility of transforming `fcmp + sel` into `maxnum/minnum` intrinsics. Alive2 results: https://alive2.llvm.org/ce/z/2cmimW https://alive2.llvm.org/ce/z/Qh9ZJt https://alive2.llvm.org/ce/z/vtLj3R
This patch is incorrect w.r.t. to flags. See Alive2's report: define float @test_fcmp_ogt_fadd_select_constant(float %in) {
%cmp1 = fcmp ogt float %in, 0.000000
%add = fadd float %in, 1.000000
%sel = select nnan nsz i1 %cmp1, float %add, float 1.000000
ret float %sel
}
=>
define float @test_fcmp_ogt_fadd_select_constant(float %in) {
%sel = fmax nnan nsz float %in, 0.000000
%add = fadd nnan nsz float %sel, 1.000000
ret float %add
}
Transformation doesn't verify! (unsound)
ERROR: Target is more poisonous than source
Example:
float %in = #x7f800002 (SNaN)
Source:
i1 %cmp1 = #x0 (0)
float %add = #x7f800002 (SNaN)
float %sel = #x3f800000 (1)
Target:
float %sel = poison
float %add = poison
Source value: #x3f800000 (1)
Target value: poison |
@nunoplopes Thanks for reporting the issue, we are already aware of this issue. Please see the earlier communication in this thread. #106492 (comment) |
define float @test_fcmp_ogt_fadd_select_constant_swapped(float %in) { | ||
; CHECK-LABEL: define float @test_fcmp_ogt_fadd_select_constant_swapped( | ||
; CHECK-SAME: float [[IN:%.*]]) { | ||
; CHECK-NEXT: [[SEL_NEW:%.*]] = call nnan nsz float @llvm.maxnum.f32(float [[IN]], float 0.000000e+00) | ||
; CHECK-NEXT: [[ADD_NEW:%.*]] = fadd nnan nsz float [[SEL_NEW]], 1.000000e+00 | ||
; CHECK-NEXT: ret float [[ADD_NEW]] | ||
; | ||
%cmp1 = fcmp ogt float %in, 0.000000e+00 | ||
%add = fadd float %in, 1.000000e+00 | ||
%sel = select nnan nsz i1 %cmp1, float 1.000000e+00, float %add | ||
ret float %sel | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is trivially incorrect.
define float @test_fcmp_oge_fadd_select_constant_swapped(float %in) {
#0:
%cmp1 = fcmp oge float %in, 0.000000
%add = fadd float %in, 1.000000
%sel = select nnan nsz i1 %cmp1, float 1.000000, float %add
ret float %sel
}
=>
define float @test_fcmp_oge_fadd_select_constant_swapped(float %in) {
#0:
%sel = fmax nsz float %in, 0.000000
%add = fadd nnan nsz float %sel, 1.000000
ret float %add
}
Transformation doesn't verify! (unsound)
ERROR: Value mismatch
Example:
float %in = #x3e800001 (0.250000029802?)
Source:
i1 %cmp1 = #x1 (1)
float %add = #x3fa00000 (1.25)
float %sel = #x3f800000 (1)
Target:
float %sel = #x3e800001 (0.250000029802?)
float %add = #x3fa00000 (1.25)
Source value: #x3f800000 (1)
Target value: #x3fa00000 (1.25)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Right, it should be llvm.minnum.f32
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should be fixed with this: #119419
Transform
fcmp + fadd + sel
intofcmp + sel + fadd
which enables the possibility of transformingfcmp + sel
intomaxnum/minnum
intrinsics.Alive2 results:
https://alive2.llvm.org/ce/z/2cmimW
https://alive2.llvm.org/ce/z/Qh9ZJt
https://alive2.llvm.org/ce/z/vtLj3R