-
Notifications
You must be signed in to change notification settings - Fork 12.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[ValueTracking] Don't use CondContext in dataflow analysis of phi nodes #100316
Conversation
@llvm/pr-subscribers-llvm-analysis @llvm/pr-subscribers-llvm-transforms Author: Yingwei Zheng (dtcxzyw) ChangesSee the following case:
When computing knownbits of Fixes #100298. Full diff: https://github.com/llvm/llvm-project/pull/100316.diff 3 Files Affected:
diff --git a/llvm/include/llvm/Analysis/SimplifyQuery.h b/llvm/include/llvm/Analysis/SimplifyQuery.h
index a560744f01222..e8f43c8c2e91f 100644
--- a/llvm/include/llvm/Analysis/SimplifyQuery.h
+++ b/llvm/include/llvm/Analysis/SimplifyQuery.h
@@ -130,6 +130,12 @@ struct SimplifyQuery {
Copy.CC = &CC;
return Copy;
}
+
+ SimplifyQuery getWithoutCondContext() const {
+ SimplifyQuery Copy(*this);
+ Copy.CC = nullptr;
+ return Copy;
+ }
};
} // end namespace llvm
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 40fe1ffe13f1b..4b77c0046cc70 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -1435,7 +1435,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
// inferred hold at original context instruction. TODO: It may be
// correct to use the original context. IF warranted, explore and
// add sufficient tests to cover.
- SimplifyQuery RecQ = Q;
+ SimplifyQuery RecQ = Q.getWithoutCondContext();
RecQ.CxtI = P;
computeKnownBits(R, DemandedElts, Known2, Depth + 1, RecQ);
switch (Opcode) {
@@ -1468,7 +1468,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
// phi. This is important because that is where the value is actually
// "evaluated" even though it is used later somewhere else. (see also
// D69571).
- SimplifyQuery RecQ = Q;
+ SimplifyQuery RecQ = Q.getWithoutCondContext();
unsigned OpNum = P->getOperand(0) == R ? 0 : 1;
Instruction *RInst = P->getIncomingBlock(OpNum)->getTerminator();
@@ -1546,7 +1546,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
// phi. This is important because that is where the value is actually
// "evaluated" even though it is used later somewhere else. (see also
// D69571).
- SimplifyQuery RecQ = Q;
+ SimplifyQuery RecQ = Q.getWithoutCondContext();
RecQ.CxtI = P->getIncomingBlock(u)->getTerminator();
Known2 = KnownBits(BitWidth);
@@ -2329,7 +2329,7 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
// it is an induction variable where in each step its value is a power of
// two.
auto *PN = cast<PHINode>(I);
- SimplifyQuery RecQ = Q;
+ SimplifyQuery RecQ = Q.getWithoutCondContext();
// Check if it is an induction variable and always power of two.
if (isPowerOfTwoRecurrence(PN, OrZero, Depth, RecQ))
@@ -2943,7 +2943,7 @@ static bool isKnownNonZeroFromOperator(const Operator *I,
return true;
// Check if all incoming values are non-zero using recursion.
- SimplifyQuery RecQ = Q;
+ SimplifyQuery RecQ = Q.getWithoutCondContext();
unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1);
return llvm::all_of(PN->operands(), [&](const Use &U) {
if (U.get() == PN)
@@ -3509,7 +3509,7 @@ static bool isNonEqualPHIs(const PHINode *PN1, const PHINode *PN2,
if (UsedFullRecursion)
return false;
- SimplifyQuery RecQ = Q;
+ SimplifyQuery RecQ = Q.getWithoutCondContext();
RecQ.CxtI = IncomBB->getTerminator();
if (!isKnownNonEqual(IV1, IV2, DemandedElts, Depth + 1, RecQ))
return false;
@@ -4001,7 +4001,7 @@ static unsigned ComputeNumSignBitsImpl(const Value *V,
// Take the minimum of all incoming values. This can't infinitely loop
// because of our depth threshold.
- SimplifyQuery RecQ = Q;
+ SimplifyQuery RecQ = Q.getWithoutCondContext();
Tmp = TyBits;
for (unsigned i = 0, e = NumIncomingValues; i != e; ++i) {
if (Tmp == 1) return Tmp;
@@ -5909,10 +5909,10 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
// Recurse, but cap the recursion to two levels, because we don't want
// to waste time spinning around in loops. We need at least depth 2 to
// detect known sign bits.
- computeKnownFPClass(
- IncValue, DemandedElts, InterestedClasses, KnownSrc,
- PhiRecursionLimit,
- Q.getWithInstruction(P->getIncomingBlock(U)->getTerminator()));
+ computeKnownFPClass(IncValue, DemandedElts, InterestedClasses, KnownSrc,
+ PhiRecursionLimit,
+ Q.getWithoutCondContext().getWithInstruction(
+ P->getIncomingBlock(U)->getTerminator()));
if (First) {
Known = KnownSrc;
diff --git a/llvm/test/Transforms/InstCombine/pr100298.ll b/llvm/test/Transforms/InstCombine/pr100298.ll
new file mode 100644
index 0000000000000..6cf2a71bb916e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr100298.ll
@@ -0,0 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+
+; Make sure that the result of computeKnownBits for %indvar is correct.
+
+define i16 @pr100298() {
+; CHECK-LABEL: define i16 @pr100298() {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[FOR_INC:.*]]
+; CHECK: [[FOR_INC]]:
+; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ -15, %[[ENTRY]] ], [ [[MASK:%.*]], %[[FOR_INC]] ]
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[INDVAR]], 9
+; CHECK-NEXT: [[MASK]] = and i32 [[ADD]], 65535
+; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[MASK]], 5
+; CHECK-NEXT: br i1 [[CMP1]], label %[[FOR_INC]], label %[[FOR_END:.*]]
+; CHECK: [[FOR_END]]:
+; CHECK-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i16
+; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt i32 [[MASK]], 3
+; CHECK-NEXT: [[SHL:%.*]] = shl nuw i16 [[CONV]], 14
+; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP2]], i16 [[CONV]], i16 [[SHL]]
+; CHECK-NEXT: ret i16 [[RES]]
+;
+entry:
+ br label %for.inc
+
+for.inc:
+ %indvar = phi i32 [ -15, %entry ], [ %mask, %for.inc ]
+ %add = add nsw i32 %indvar, 9
+ %mask = and i32 %add, 65535
+ %cmp1 = icmp ugt i32 %mask, 5
+ br i1 %cmp1, label %for.inc, label %for.end
+
+for.end:
+ %conv = trunc i32 %add to i16
+ %cmp2 = icmp ugt i32 %mask, 3
+ %shl = shl nuw i16 %conv, 14
+ %res = select i1 %cmp2, i16 %conv, i16 %shl
+ ret i16 %res
+}
|
Okay, took me a second, do I understand correctly that the issue is that since If thats correct, I think this works to fix it, although I wonder if we can do this a bit more precisely? I don't think this could be an issue outside of the context of a loop and it would be nice be able to keep this context for say analyzing the result of a switch/diamond branch. |
This fix doesn't affect real-world code. I don't see the value of increasing the complexity. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, should have seen this coming...
/cherry-pick 59eae91 |
…es (llvm#100316) See the following case: ``` define i16 @pr100298() { entry: br label %for.inc for.inc: %indvar = phi i32 [ -15, %entry ], [ %mask, %for.inc ] %add = add nsw i32 %indvar, 9 %mask = and i32 %add, 65535 %cmp1 = icmp ugt i32 %mask, 5 br i1 %cmp1, label %for.inc, label %for.end for.end: %conv = trunc i32 %add to i16 %cmp2 = icmp ugt i32 %mask, 3 %shl = shl nuw i16 %conv, 14 %res = select i1 %cmp2, i16 %conv, i16 %shl ret i16 %res } ``` When computing knownbits of `%shl` with `%cmp2=false`, we cannot use this condition in the analysis of `%mask (%for.inc -> %for.inc)`. Fixes llvm#100298. (cherry picked from commit 59eae91)
/pull-request #100356 |
…es (llvm#100316) See the following case: ``` define i16 @pr100298() { entry: br label %for.inc for.inc: %indvar = phi i32 [ -15, %entry ], [ %mask, %for.inc ] %add = add nsw i32 %indvar, 9 %mask = and i32 %add, 65535 %cmp1 = icmp ugt i32 %mask, 5 br i1 %cmp1, label %for.inc, label %for.end for.end: %conv = trunc i32 %add to i16 %cmp2 = icmp ugt i32 %mask, 3 %shl = shl nuw i16 %conv, 14 %res = select i1 %cmp2, i16 %conv, i16 %shl ret i16 %res } ``` When computing knownbits of `%shl` with `%cmp2=false`, we cannot use this condition in the analysis of `%mask (%for.inc -> %for.inc)`. Fixes llvm#100298. (cherry picked from commit 59eae91)
…es (#100316) Summary: See the following case: ``` define i16 @pr100298() { entry: br label %for.inc for.inc: %indvar = phi i32 [ -15, %entry ], [ %mask, %for.inc ] %add = add nsw i32 %indvar, 9 %mask = and i32 %add, 65535 %cmp1 = icmp ugt i32 %mask, 5 br i1 %cmp1, label %for.inc, label %for.end for.end: %conv = trunc i32 %add to i16 %cmp2 = icmp ugt i32 %mask, 3 %shl = shl nuw i16 %conv, 14 %res = select i1 %cmp2, i16 %conv, i16 %shl ret i16 %res } ``` When computing knownbits of `%shl` with `%cmp2=false`, we cannot use this condition in the analysis of `%mask (%for.inc -> %for.inc)`. Fixes #100298. Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60250567
…es (llvm#100316) See the following case: ``` define i16 @pr100298() { entry: br label %for.inc for.inc: %indvar = phi i32 [ -15, %entry ], [ %mask, %for.inc ] %add = add nsw i32 %indvar, 9 %mask = and i32 %add, 65535 %cmp1 = icmp ugt i32 %mask, 5 br i1 %cmp1, label %for.inc, label %for.end for.end: %conv = trunc i32 %add to i16 %cmp2 = icmp ugt i32 %mask, 3 %shl = shl nuw i16 %conv, 14 %res = select i1 %cmp2, i16 %conv, i16 %shl ret i16 %res } ``` When computing knownbits of `%shl` with `%cmp2=false`, we cannot use this condition in the analysis of `%mask (%for.inc -> %for.inc)`. Fixes llvm#100298.
See the following case:
When computing knownbits of
%shl
with%cmp2=false
, we cannot use this condition in the analysis of%mask (%for.inc -> %for.inc)
.Fixes #100298.