-
Notifications
You must be signed in to change notification settings - Fork 12.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[InstCombine] Canonicalize more saturated-add variants #100008
Conversation
@llvm/pr-subscribers-llvm-transforms Author: AtariDreams (AtariDreams) Changes[InstCombine] Enable saturated add canonicalization in more variants To fix this, let's move the folds to after the canonicalization of -1 to TrueVal. Alive2 Proof: Full diff: https://github.com/llvm/llvm-project/pull/100008.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index aaf4ece3249a2..dbe9d39a89dcb 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -977,14 +977,7 @@ static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal,
Value *Cmp1 = Cmp->getOperand(1);
ICmpInst::Predicate Pred = Cmp->getPredicate();
Value *X;
- const APInt *C, *CmpC;
- if (Pred == ICmpInst::ICMP_ULT &&
- match(TVal, m_Add(m_Value(X), m_APInt(C))) && X == Cmp0 &&
- match(FVal, m_AllOnes()) && match(Cmp1, m_APInt(CmpC)) && *CmpC == ~*C) {
- // (X u< ~C) ? (X + C) : -1 --> uadd.sat(X, C)
- return Builder.CreateBinaryIntrinsic(
- Intrinsic::uadd_sat, X, ConstantInt::get(X->getType(), *C));
- }
+ const APInt *C;
// Match unsigned saturated add of 2 variables with an unnecessary 'not'.
// There are 8 commuted variants.
@@ -996,6 +989,30 @@ static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal,
if (!match(TVal, m_AllOnes()))
return nullptr;
+ if ((Pred == ICmpInst::ICMP_UGE || Pred == ICmpInst::ICMP_UGT) &&
+ match(FVal, m_Add(m_Specific(Cmp0), m_APInt(C))) &&
+ match(Cmp1, m_SpecificInt(~*C))) {
+ // (X u> ~C) ? -1 : (X + C) --> uadd.sat(X, C)
+ return Builder.CreateBinaryIntrinsic(Intrinsic::uadd_sat, Cmp0,
+ ConstantInt::get(Cmp0->getType(), *C));
+ }
+
+ if (Pred == ICmpInst::ICMP_UGE &&
+ match(FVal, m_Add(m_Specific(Cmp0), m_APInt(C))) &&
+ match(Cmp1, m_SpecificInt(-*C))) {
+ // (X u> -C) ? -1 : (X + C) --> uadd.sat(X, C)
+ return Builder.CreateBinaryIntrinsic(Intrinsic::uadd_sat, Cmp0,
+ ConstantInt::get(Cmp0->getType(), *C));
+ }
+
+ if (Pred == ICmpInst::ICMP_UGT &&
+ match(FVal, m_Add(m_Specific(Cmp0), m_APInt(C))) &&
+ match(Cmp1, m_SpecificInt(~*C - 1))) {
+ // (X u> ~C - 1) ? -1 : (X + C) --> uadd.sat(X, C)
+ return Builder.CreateBinaryIntrinsic(Intrinsic::uadd_sat, Cmp0,
+ ConstantInt::get(Cmp0->getType(), *C));
+ }
+
// Canonicalize predicate to less-than or less-or-equal-than.
if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) {
std::swap(Cmp0, Cmp1);
diff --git a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
index bf1568f1cd8c0..0751efa16e402 100644
--- a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
+++ b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
@@ -1395,6 +1395,215 @@ define i32 @uadd_sat(i32 %x, i32 %y) {
%r = select i1 %c, i32 -1, i32 %a
ret i32 %r
}
+
+define i32 @uadd_sat_flipped(i32 %x) {
+; CHECK-LABEL: @uadd_sat_flipped(
+; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 9)
+; CHECK-NEXT: ret i32 [[COND]]
+;
+ %cmp = icmp uge i32 %x, -10
+ %add = add i32 %x, 9
+ %cond = select i1 %cmp, i32 -1, i32 %add
+ ret i32 %cond
+}
+
+define i32 @uadd_sat_flipped2(i32 %x) {
+; CHECK-LABEL: @uadd_sat_flipped2(
+; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 9)
+; CHECK-NEXT: ret i32 [[COND]]
+;
+ %cmp = icmp ugt i32 %x, -10
+ %add = add i32 %x, 9
+ %cond = select i1 %cmp, i32 -1, i32 %add
+ ret i32 %cond
+}
+
+define i32 @uadd_sat_flipped3(i32 %x) {
+; CHECK-LABEL: @uadd_sat_flipped3(
+; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 9)
+; CHECK-NEXT: ret i32 [[COND]]
+;
+ %cmp = icmp ugt i32 %x, -11
+ %add = add i32 %x, 9
+ %cond = select i1 %cmp, i32 -1, i32 %add
+ ret i32 %cond
+}
+
+define i32 @uadd_sat_flipped4(i32 %x) {
+; CHECK-LABEL: @uadd_sat_flipped4(
+; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 9)
+; CHECK-NEXT: ret i32 [[COND]]
+;
+ %cmp = icmp uge i32 %x, -9
+ %add = add i32 %x, 9
+ %cond = select i1 %cmp, i32 -1, i32 %add
+ ret i32 %cond
+}
+
+define i32 @uadd_sat_flipped5(i32 %x) {
+; CHECK-LABEL: @uadd_sat_flipped5(
+; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 9)
+; CHECK-NEXT: ret i32 [[COND]]
+;
+ %cmp = icmp ult i32 %x, -9
+ %add = add i32 %x, 9
+ %cond = select i1 %cmp, i32 %add, i32 -1
+ ret i32 %cond
+}
+
+define i32 @uadd_sat_flipped6(i32 %x) {
+; CHECK-LABEL: @uadd_sat_flipped6(
+; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 9)
+; CHECK-NEXT: ret i32 [[COND]]
+;
+ %cmp = icmp ule i32 %x, -11
+ %add = add i32 %x, 9
+ %cond = select i1 %cmp, i32 %add, i32 -1
+ ret i32 %cond
+}
+
+define i32 @uadd_sat_flipped7(i32 %x) {
+; CHECK-LABEL: @uadd_sat_flipped7(
+; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 9)
+; CHECK-NEXT: ret i32 [[COND]]
+;
+ %cmp = icmp ule i32 %x, -10
+ %add = add i32 %x, 9
+ %cond = select i1 %cmp, i32 %add, i32 -1
+ ret i32 %cond
+}
+
+define i32 @uadd_sat_flipped8(i32 %x) {
+; CHECK-LABEL: @uadd_sat_flipped8(
+; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 9)
+; CHECK-NEXT: ret i32 [[COND]]
+;
+ %cmp = icmp ult i32 %x, -10
+ %add = add i32 %x, 9
+ %cond = select i1 %cmp, i32 %add, i32 -1
+ ret i32 %cond
+}
+
+; Negative test:
+
+define i32 @uadd_sat_flipped_wrong_bounds(i32 %x) {
+; CHECK-LABEL: @uadd_sat_flipped_wrong_bounds(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[X:%.*]], -13
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[X]], 9
+; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 -1, i32 [[ADD]]
+; CHECK-NEXT: ret i32 [[COND]]
+;
+ %cmp = icmp uge i32 %x, -12
+ %add = add i32 %x, 9
+ %cond = select i1 %cmp, i32 -1, i32 %add
+ ret i32 %cond
+}
+
+; Negative test:
+
+define i32 @uadd_sat_flipped_wrong_bounds2(i32 %x) {
+; CHECK-LABEL: @uadd_sat_flipped_wrong_bounds2(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[X:%.*]], -12
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[X]], 9
+; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 -1, i32 [[ADD]]
+; CHECK-NEXT: ret i32 [[COND]]
+;
+ %cmp = icmp ugt i32 %x, -12
+ %add = add i32 %x, 9
+ %cond = select i1 %cmp, i32 -1, i32 %add
+ ret i32 %cond
+}
+
+; Negative test:
+
+define i32 @uadd_sat_flipped_wrong_bounds3(i32 %x) {
+; CHECK-LABEL: @uadd_sat_flipped_wrong_bounds3(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[X:%.*]], -12
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[X]], 9
+; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 -1, i32 [[ADD]]
+; CHECK-NEXT: ret i32 [[COND]]
+;
+ %cmp = icmp ugt i32 %x, -12
+ %add = add i32 %x, 9
+ %cond = select i1 %cmp, i32 -1, i32 %add
+ ret i32 %cond
+}
+
+; Negative test:
+
+define i32 @uadd_sat_flipped_wrong_bounds4(i32 %x) {
+; CHECK-LABEL: @uadd_sat_flipped_wrong_bounds4(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[X:%.*]], -9
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[X]], 9
+; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 -1, i32 [[ADD]]
+; CHECK-NEXT: ret i32 [[COND]]
+;
+ %cmp = icmp uge i32 %x, -8
+ %add = add i32 %x, 9
+ %cond = select i1 %cmp, i32 -1, i32 %add
+ ret i32 %cond
+}
+
+; Negative test:
+
+define i32 @uadd_sat_flipped_wrong_bounds5(i32 %x) {
+; CHECK-LABEL: @uadd_sat_flipped_wrong_bounds5(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[X:%.*]], -8
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[X]], 9
+; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[ADD]], i32 -1
+; CHECK-NEXT: ret i32 [[COND]]
+;
+ %cmp = icmp ult i32 %x, -8
+ %add = add i32 %x, 9
+ %cond = select i1 %cmp, i32 %add, i32 -1
+ ret i32 %cond
+}
+
+; Negative test:
+
+define i32 @uadd_sat_flipped_wrong_bounds6(i32 %x) {
+; CHECK-LABEL: @uadd_sat_flipped_wrong_bounds6(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[X:%.*]], -11
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[X]], 9
+; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[ADD]], i32 -1
+; CHECK-NEXT: ret i32 [[COND]]
+;
+ %cmp = icmp ule i32 %x, -12
+ %add = add i32 %x, 9
+ %cond = select i1 %cmp, i32 %add, i32 -1
+ ret i32 %cond
+}
+
+; Negative test:
+
+define i32 @uadd_sat_flipped_wrong_bounds7(i32 %x) {
+; CHECK-LABEL: @uadd_sat_flipped_wrong_bounds7(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[X:%.*]], -11
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[X]], 9
+; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[ADD]], i32 -1
+; CHECK-NEXT: ret i32 [[COND]]
+;
+ %cmp = icmp ule i32 %x, -12
+ %add = add i32 %x, 9
+ %cond = select i1 %cmp, i32 %add, i32 -1
+ ret i32 %cond
+}
+
+; Negative test:
+
+define i32 @uadd_sat_flipped_wrong_bounds8(i32 %x) {
+; CHECK-LABEL: @uadd_sat_flipped_wrong_bounds8(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[X:%.*]], -12
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[X]], 9
+; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[ADD]], i32 -1
+; CHECK-NEXT: ret i32 [[COND]]
+;
+ %cmp = icmp ult i32 %x, -12
+ %add = add i32 %x, 9
+ %cond = select i1 %cmp, i32 %add, i32 -1
+ ret i32 %cond
+}
+
define i32 @uadd_sat_nonstrict(i32 %x, i32 %y) {
; CHECK-LABEL: @uadd_sat_nonstrict(
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 [[Y:%.*]])
@@ -1736,9 +1945,7 @@ define i32 @uadd_sat_not_commute_select_uge_commute_add(i32 %x, i32 %y) {
define i32 @uadd_sat_constant(i32 %x) {
; CHECK-LABEL: @uadd_sat_constant(
-; CHECK-NEXT: [[A:%.*]] = add i32 [[X:%.*]], 42
-; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 [[X]], -43
-; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 -1, i32 [[A]]
+; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 42)
; CHECK-NEXT: ret i32 [[R]]
;
%a = add i32 %x, 42
@@ -1804,9 +2011,7 @@ define i32 @uadd_sat_canon_y_nuw(i32 %x, i32 %y) {
define <4 x i32> @uadd_sat_constant_vec(<4 x i32> %x) {
; CHECK-LABEL: @uadd_sat_constant_vec(
-; CHECK-NEXT: [[A:%.*]] = add <4 x i32> [[X:%.*]], <i32 42, i32 42, i32 42, i32 42>
-; CHECK-NEXT: [[C:%.*]] = icmp ugt <4 x i32> [[X]], <i32 -43, i32 -43, i32 -43, i32 -43>
-; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[C]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> [[A]]
+; CHECK-NEXT: [[R:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[X:%.*]], <4 x i32> <i32 42, i32 42, i32 42, i32 42>)
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%a = add <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42>
|
cd37fd0
to
0707bce
Compare
Why did you close #97973? |
Botched rebase that I didn't feel like manually undoing. I just cherry-picked and started again. Never doing --update-refs again |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please replace tests using something like uge C with ugt C-1 so the input is canonical. And then remove all the tests that become duplicates as a result.
Your alive2 proofs also include some preconditions -- some cases check that C!=0. But it does not look like your implementation contains those checks?
c96cd04
to
a82b20e
Compare
It does now! |
This PR was pretty close to an acceptable state the last time I looked at it. Now you have once again added extra changes that nobody asked for, and moved it away from acceptance again. |
The change to AllowPoison is warranted. See the tests and Alive2. |
349be38
to
48ae486
Compare
I fully exhausted all possible canonicalizations, well, minus actual variables but that will be for a follow-up PR, which means there is nothing else to do. I hope the test coverage and proofs are adequate. |
ret i32 %cond | ||
} | ||
|
||
define i32 @uadd_sat_negative_one_poison(i32 %x) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't know if the scalar poison tests are interesting. The icmp with poison operand will folded to poison before the select is visited. The add with poison operand will also be folded to poison before the select is visited.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't know if the scalar poison tests are interesting. The icmp with poison operand will folded to poison before the select is visited. The add with poison operand will also be folded to poison before the select is visited.
Addressed!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It doesn't look like this was done. There are still 9 tests with scalar poison.
1345002
to
c255b8f
Compare
@topperc Is this PR ready to merge? |
Please do not ping a patch more than once per week per the coding standards. https://llvm.org/docs/CodeReview.html#lgtm-how-a-patch-is-accepted "Note that, if a reviewer has requested a particular community member to review, and after a week that community member has yet to respond, feel free to ping the patch (which literally means submitting a comment on the patch with the word, “Ping.”), or alternatively, ask the original reviewer for further suggestions." |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Understood! |
; CHECK-NEXT: ret i32 -1 | ||
; | ||
%cmp = icmp eq i32 %x, -1 | ||
%add = add i32 %x, poison |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Still testing scalar poison
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Still testing scalar poison
Addressed!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
LLVM is not evaluating X u > C, a, b the same way it evaluates X <= C, b, a. To fix this, let's move the folds to after the canonicalization of -1 to TrueVal. Let's allow splat vectors with poison elements to be recognized too! Finally, for completion, handle the one case that isn't caught by the above checks because it is canonicalized to eq: X == -1 ? -1 : X + 1 -> uadd.sat(X, 1) Alive2 Proof: https://alive2.llvm.org/ce/z/WEcgYH
Why do you force push patches after they have been approved? We have to re-review to make sure nothing changed. But force push makes that hard. |
I folded the third commit away. |
Apologies, this should be safe now. I won't touch this anymore. |
Can we please merge this now? I do not have commit permissions. |
LLVM is not evaluating X u > C, a, b the same way it evaluates X <= C, b, a. To fix this, let's move the folds to after the canonicalization of -1 to TrueVal. Let's allow splat vectors with poison elements to be recognized too! Finally, for completion, handle the one case that isn't caught by the above checks because it is canonicalized to eq: X == -1 ? -1 : X + 1 -> uadd.sat(X, 1) Alive2 Proof: https://alive2.llvm.org/ce/z/WEcgYH
LLVM is not evaluating X u > C, a, b the same way it evaluates X <= C, b, a. To fix this, let's move the folds to after the canonicalization of -1 to TrueVal. Let's allow splat vectors with poison elements to be recognized too! Finally, for completion, handle the one case that isn't caught by the above checks because it is canonicalized to eq: X == -1 ? -1 : X + 1 -> uadd.sat(X, 1) Alive2 Proof: https://alive2.llvm.org/ce/z/WEcgYH
LLVM is not evaluating X u > C, a, b the same way it evaluates X <= C, b, a. To fix this, let's move the folds to after the canonicalization of -1 to TrueVal. Let's allow splat vectors with poison elements to be recognized too! Finally, for completion, handle the one case that isn't caught by the above checks because it is canonicalized to eq: X == -1 ? -1 : X + 1 -> uadd.sat(X, 1) Alive2 Proof: https://alive2.llvm.org/ce/z/WEcgYH
LLVM is not evaluating X u > C, a, b the same way it evaluates X <= C, b, a. To fix this, let's move the folds to after the canonicalization of -1 to TrueVal. Let's allow splat vectors with poison elements to be recognized too! Finally, for completion, handle the one case that isn't caught by the above checks because it is canonicalized to eq: X == -1 ? -1 : X + 1 -> uadd.sat(X, 1) Alive2 Proof: https://alive2.llvm.org/ce/z/WEcgYH
LLVM is not evaluating X u > C, a, b the same way it evaluates X <= C, b, a.
To fix this, let's move the folds to after the canonicalization of -1 to TrueVal.
Let's allow splat vectors with poison elements to be recognized too!
As a result, negative tests that didn't get folded due to having poison in the vector now fold as well!
Finally, for completion, handle the one case that isn't caught by the above checks because it is canonicalized to eq:
X == -1 ? -1 : X + 1 -> uadd.sat(X, 1)
Alive2 Proof:
https://alive2.llvm.org/ce/z/WEcgYH