[ValueTracking] Don't use CondContext in dataflow analysis of phi nodes #100316

dtcxzyw · 2024-07-24T07:35:25Z

See the following case:

define i16 @pr100298() {
entry:
  br label %for.inc

for.inc:
  %indvar = phi i32 [ -15, %entry ], [ %mask, %for.inc ]
  %add = add nsw i32 %indvar, 9
  %mask = and i32 %add, 65535
  %cmp1 = icmp ugt i32 %mask, 5
  br i1 %cmp1, label %for.inc, label %for.end

for.end:
  %conv = trunc i32 %add to i16
  %cmp2 = icmp ugt i32 %mask, 3
  %shl = shl nuw i16 %conv, 14
  %res = select i1 %cmp2, i16 %conv, i16 %shl
  ret i16 %res
}

When computing knownbits of %shl with %cmp2=false, we cannot use this condition in the analysis of %mask (%for.inc -> %for.inc).

Fixes #100298.

llvmbot · 2024-07-24T07:35:54Z

@llvm/pr-subscribers-llvm-analysis

@llvm/pr-subscribers-llvm-transforms

Author: Yingwei Zheng (dtcxzyw)

Changes

See the following case:

define i16 @<!-- -->pr100298() {
entry:
  br label %for.inc

for.inc:
  %indvar = phi i32 [ -15, %entry ], [ %mask, %for.inc ]
  %add = add nsw i32 %indvar, 9
  %mask = and i32 %add, 65535
  %cmp1 = icmp ugt i32 %mask, 5
  br i1 %cmp1, label %for.inc, label %for.end

for.end:
  %conv = trunc i32 %add to i16
  %cmp2 = icmp ugt i32 %mask, 3
  %shl = shl nuw i16 %conv, 14
  %res = select i1 %cmp2, i16 %conv, i16 %shl
  ret i16 %res
}

When computing knownbits of %shl with %cmp2=false, we cannot use this condition in the analysis of %mask (%for.inc -> %for.inc).

Fixes #100298.

Full diff: https://github.com/llvm/llvm-project/pull/100316.diff

3 Files Affected:

(modified) llvm/include/llvm/Analysis/SimplifyQuery.h (+6)
(modified) llvm/lib/Analysis/ValueTracking.cpp (+11-11)
(added) llvm/test/Transforms/InstCombine/pr100298.ll (+39)

diff --git a/llvm/include/llvm/Analysis/SimplifyQuery.h b/llvm/include/llvm/Analysis/SimplifyQuery.h
index a560744f01222..e8f43c8c2e91f 100644
--- a/llvm/include/llvm/Analysis/SimplifyQuery.h
+++ b/llvm/include/llvm/Analysis/SimplifyQuery.h
@@ -130,6 +130,12 @@ struct SimplifyQuery {
     Copy.CC = &CC;
     return Copy;
   }
+
+  SimplifyQuery getWithoutCondContext() const {
+    SimplifyQuery Copy(*this);
+    Copy.CC = nullptr;
+    return Copy;
+  }
 };
 
 } // end namespace llvm
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 40fe1ffe13f1b..4b77c0046cc70 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -1435,7 +1435,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
         // inferred hold at original context instruction.  TODO: It may be
         // correct to use the original context.  IF warranted, explore and
         // add sufficient tests to cover.
-        SimplifyQuery RecQ = Q;
+        SimplifyQuery RecQ = Q.getWithoutCondContext();
         RecQ.CxtI = P;
         computeKnownBits(R, DemandedElts, Known2, Depth + 1, RecQ);
         switch (Opcode) {
@@ -1468,7 +1468,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
         // phi. This is important because that is where the value is actually
         // "evaluated" even though it is used later somewhere else. (see also
         // D69571).
-        SimplifyQuery RecQ = Q;
+        SimplifyQuery RecQ = Q.getWithoutCondContext();
 
         unsigned OpNum = P->getOperand(0) == R ? 0 : 1;
         Instruction *RInst = P->getIncomingBlock(OpNum)->getTerminator();
@@ -1546,7 +1546,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
         // phi. This is important because that is where the value is actually
         // "evaluated" even though it is used later somewhere else. (see also
         // D69571).
-        SimplifyQuery RecQ = Q;
+        SimplifyQuery RecQ = Q.getWithoutCondContext();
         RecQ.CxtI = P->getIncomingBlock(u)->getTerminator();
 
         Known2 = KnownBits(BitWidth);
@@ -2329,7 +2329,7 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
     // it is an induction variable where in each step its value is a power of
     // two.
     auto *PN = cast<PHINode>(I);
-    SimplifyQuery RecQ = Q;
+    SimplifyQuery RecQ = Q.getWithoutCondContext();
 
     // Check if it is an induction variable and always power of two.
     if (isPowerOfTwoRecurrence(PN, OrZero, Depth, RecQ))
@@ -2943,7 +2943,7 @@ static bool isKnownNonZeroFromOperator(const Operator *I,
       return true;
 
     // Check if all incoming values are non-zero using recursion.
-    SimplifyQuery RecQ = Q;
+    SimplifyQuery RecQ = Q.getWithoutCondContext();
     unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1);
     return llvm::all_of(PN->operands(), [&](const Use &U) {
       if (U.get() == PN)
@@ -3509,7 +3509,7 @@ static bool isNonEqualPHIs(const PHINode *PN1, const PHINode *PN2,
     if (UsedFullRecursion)
       return false;
 
-    SimplifyQuery RecQ = Q;
+    SimplifyQuery RecQ = Q.getWithoutCondContext();
     RecQ.CxtI = IncomBB->getTerminator();
     if (!isKnownNonEqual(IV1, IV2, DemandedElts, Depth + 1, RecQ))
       return false;
@@ -4001,7 +4001,7 @@ static unsigned ComputeNumSignBitsImpl(const Value *V,
 
       // Take the minimum of all incoming values.  This can't infinitely loop
       // because of our depth threshold.
-      SimplifyQuery RecQ = Q;
+      SimplifyQuery RecQ = Q.getWithoutCondContext();
       Tmp = TyBits;
       for (unsigned i = 0, e = NumIncomingValues; i != e; ++i) {
         if (Tmp == 1) return Tmp;
@@ -5909,10 +5909,10 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
         // Recurse, but cap the recursion to two levels, because we don't want
         // to waste time spinning around in loops. We need at least depth 2 to
         // detect known sign bits.
-        computeKnownFPClass(
-            IncValue, DemandedElts, InterestedClasses, KnownSrc,
-            PhiRecursionLimit,
-            Q.getWithInstruction(P->getIncomingBlock(U)->getTerminator()));
+        computeKnownFPClass(IncValue, DemandedElts, InterestedClasses, KnownSrc,
+                            PhiRecursionLimit,
+                            Q.getWithoutCondContext().getWithInstruction(
+                                P->getIncomingBlock(U)->getTerminator()));
 
         if (First) {
           Known = KnownSrc;
diff --git a/llvm/test/Transforms/InstCombine/pr100298.ll b/llvm/test/Transforms/InstCombine/pr100298.ll
new file mode 100644
index 0000000000000..6cf2a71bb916e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr100298.ll
@@ -0,0 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+
+; Make sure that the result of computeKnownBits for %indvar is correct.
+
+define i16 @pr100298() {
+; CHECK-LABEL: define i16 @pr100298() {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[FOR_INC:.*]]
+; CHECK:       [[FOR_INC]]:
+; CHECK-NEXT:    [[INDVAR:%.*]] = phi i32 [ -15, %[[ENTRY]] ], [ [[MASK:%.*]], %[[FOR_INC]] ]
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[INDVAR]], 9
+; CHECK-NEXT:    [[MASK]] = and i32 [[ADD]], 65535
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt i32 [[MASK]], 5
+; CHECK-NEXT:    br i1 [[CMP1]], label %[[FOR_INC]], label %[[FOR_END:.*]]
+; CHECK:       [[FOR_END]]:
+; CHECK-NEXT:    [[CONV:%.*]] = trunc i32 [[ADD]] to i16
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ugt i32 [[MASK]], 3
+; CHECK-NEXT:    [[SHL:%.*]] = shl nuw i16 [[CONV]], 14
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP2]], i16 [[CONV]], i16 [[SHL]]
+; CHECK-NEXT:    ret i16 [[RES]]
+;
+entry:
+  br label %for.inc
+
+for.inc:
+  %indvar = phi i32 [ -15, %entry ], [ %mask, %for.inc ]
+  %add = add nsw i32 %indvar, 9
+  %mask = and i32 %add, 65535
+  %cmp1 = icmp ugt i32 %mask, 5
+  br i1 %cmp1, label %for.inc, label %for.end
+
+for.end:
+  %conv = trunc i32 %add to i16
+  %cmp2 = icmp ugt i32 %mask, 3
+  %shl = shl nuw i16 %conv, 14
+  %res = select i1 %cmp2, i16 %conv, i16 %shl
+  ret i16 %res
+}

goldsteinn · 2024-07-24T08:07:37Z

Okay, took me a second, do I understand correctly that the issue is that since %mask is in-out its only the output that has the %cmp2=false context?

If thats correct, I think this works to fix it, although I wonder if we can do this a bit more precisely? I don't think this could be an issue outside of the context of a loop and it would be nice be able to keep this context for say analyzing the result of a switch/diamond branch.

PR Link: llvm/llvm-project#100316

dtcxzyw · 2024-07-24T09:37:00Z

although I wonder if we can do this a bit more precisely?

This fix doesn't affect real-world code. I don't see the value of increasing the complexity.

nikic

LGTM, should have seen this coming...

dtcxzyw · 2024-07-24T12:07:53Z

/cherry-pick 59eae91

…es (llvm#100316) See the following case: ``` define i16 @pr100298() { entry: br label %for.inc for.inc: %indvar = phi i32 [ -15, %entry ], [ %mask, %for.inc ] %add = add nsw i32 %indvar, 9 %mask = and i32 %add, 65535 %cmp1 = icmp ugt i32 %mask, 5 br i1 %cmp1, label %for.inc, label %for.end for.end: %conv = trunc i32 %add to i16 %cmp2 = icmp ugt i32 %mask, 3 %shl = shl nuw i16 %conv, 14 %res = select i1 %cmp2, i16 %conv, i16 %shl ret i16 %res } ``` When computing knownbits of `%shl` with `%cmp2=false`, we cannot use this condition in the analysis of `%mask (%for.inc -> %for.inc)`. Fixes llvm#100298. (cherry picked from commit 59eae91)

llvmbot · 2024-07-24T12:12:51Z

/pull-request #100356

…es (llvm#100316) See the following case: ``` define i16 @pr100298() { entry: br label %for.inc for.inc: %indvar = phi i32 [ -15, %entry ], [ %mask, %for.inc ] %add = add nsw i32 %indvar, 9 %mask = and i32 %add, 65535 %cmp1 = icmp ugt i32 %mask, 5 br i1 %cmp1, label %for.inc, label %for.end for.end: %conv = trunc i32 %add to i16 %cmp2 = icmp ugt i32 %mask, 3 %shl = shl nuw i16 %conv, 14 %res = select i1 %cmp2, i16 %conv, i16 %shl ret i16 %res } ``` When computing knownbits of `%shl` with `%cmp2=false`, we cannot use this condition in the analysis of `%mask (%for.inc -> %for.inc)`. Fixes llvm#100298. (cherry picked from commit 59eae91)

…es (#100316) Summary: See the following case: ``` define i16 @pr100298() { entry: br label %for.inc for.inc: %indvar = phi i32 [ -15, %entry ], [ %mask, %for.inc ] %add = add nsw i32 %indvar, 9 %mask = and i32 %add, 65535 %cmp1 = icmp ugt i32 %mask, 5 br i1 %cmp1, label %for.inc, label %for.end for.end: %conv = trunc i32 %add to i16 %cmp2 = icmp ugt i32 %mask, 3 %shl = shl nuw i16 %conv, 14 %res = select i1 %cmp2, i16 %conv, i16 %shl ret i16 %res } ``` When computing knownbits of `%shl` with `%cmp2=false`, we cannot use this condition in the analysis of `%mask (%for.inc -> %for.inc)`. Fixes #100298. Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60250567

…es (llvm#100316) See the following case: ``` define i16 @pr100298() { entry: br label %for.inc for.inc: %indvar = phi i32 [ -15, %entry ], [ %mask, %for.inc ] %add = add nsw i32 %indvar, 9 %mask = and i32 %add, 65535 %cmp1 = icmp ugt i32 %mask, 5 br i1 %cmp1, label %for.inc, label %for.end for.end: %conv = trunc i32 %add to i16 %cmp2 = icmp ugt i32 %mask, 3 %shl = shl nuw i16 %conv, 14 %res = select i1 %cmp2, i16 %conv, i16 %shl ret i16 %res } ``` When computing knownbits of `%shl` with `%cmp2=false`, we cannot use this condition in the analysis of `%mask (%for.inc -> %for.inc)`. Fixes llvm#100298.

dtcxzyw added 2 commits July 24, 2024 15:05

[ValueTracking] Add pre-commit tests. NFC.

fa51e99

[ValueTracking] Don't use CondContext in dataflow analysis of phi nodes

f840c91

dtcxzyw requested a review from goldsteinn July 24, 2024 07:35

dtcxzyw requested a review from nikic as a code owner July 24, 2024 07:35

llvmbot added llvm:analysis llvm:transforms labels Jul 24, 2024

dtcxzyw added a commit to dtcxzyw/llvm-opt-benchmark that referenced this pull request Jul 24, 2024

pre-commit: test PR100316

5cfd759

PR Link: llvm/llvm-project#100316

dtcxzyw mentioned this pull request Jul 24, 2024

pre-commit: test PR100316 dtcxzyw/llvm-opt-benchmark#1050

Closed

nikic approved these changes Jul 24, 2024

View reviewed changes

dtcxzyw merged commit 59eae91 into llvm:main Jul 24, 2024
7 of 10 checks passed

dtcxzyw deleted the fix-pr100298 branch July 24, 2024 12:06

dtcxzyw added this to the LLVM 19.X Release milestone Jul 24, 2024

philnik777 mentioned this pull request Aug 2, 2024

[Clang] Add a release note deprecating __is_nullptr #101638

Merged

mgabka mentioned this pull request Aug 22, 2024

Add release note about ABI mgabka/llvm-project#6

Open

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[ValueTracking] Don't use CondContext in dataflow analysis of phi nodes #100316

[ValueTracking] Don't use CondContext in dataflow analysis of phi nodes #100316

dtcxzyw commented Jul 24, 2024

llvmbot commented Jul 24, 2024 •

edited

Loading

goldsteinn commented Jul 24, 2024

dtcxzyw commented Jul 24, 2024

nikic left a comment

dtcxzyw commented Jul 24, 2024

llvmbot commented Jul 24, 2024

[ValueTracking] Don't use CondContext in dataflow analysis of phi nodes #100316

[ValueTracking] Don't use CondContext in dataflow analysis of phi nodes #100316

Conversation

dtcxzyw commented Jul 24, 2024

llvmbot commented Jul 24, 2024 • edited Loading

goldsteinn commented Jul 24, 2024

dtcxzyw commented Jul 24, 2024

nikic left a comment

Choose a reason for hiding this comment

dtcxzyw commented Jul 24, 2024

llvmbot commented Jul 24, 2024

llvmbot commented Jul 24, 2024 •

edited

Loading