From 0800952575b594963ba485d40555f14abd897d91 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Wed, 24 Jul 2024 20:06:36 +0800 Subject: [PATCH] [ValueTracking] Don't use CondContext in dataflow analysis of phi nodes (#100316) See the following case: ``` define i16 @pr100298() { entry: br label %for.inc for.inc: %indvar = phi i32 [ -15, %entry ], [ %mask, %for.inc ] %add = add nsw i32 %indvar, 9 %mask = and i32 %add, 65535 %cmp1 = icmp ugt i32 %mask, 5 br i1 %cmp1, label %for.inc, label %for.end for.end: %conv = trunc i32 %add to i16 %cmp2 = icmp ugt i32 %mask, 3 %shl = shl nuw i16 %conv, 14 %res = select i1 %cmp2, i16 %conv, i16 %shl ret i16 %res } ``` When computing knownbits of `%shl` with `%cmp2=false`, we cannot use this condition in the analysis of `%mask (%for.inc -> %for.inc)`. Fixes https://github.com/llvm/llvm-project/issues/100298. (cherry picked from commit 59eae919c938f890e9b9b4be8a3fa3cb1b11ed89) --- llvm/include/llvm/Analysis/SimplifyQuery.h | 6 +++ llvm/lib/Analysis/ValueTracking.cpp | 22 +++++------ llvm/test/Transforms/InstCombine/pr100298.ll | 39 ++++++++++++++++++++ 3 files changed, 56 insertions(+), 11 deletions(-) create mode 100644 llvm/test/Transforms/InstCombine/pr100298.ll diff --git a/llvm/include/llvm/Analysis/SimplifyQuery.h b/llvm/include/llvm/Analysis/SimplifyQuery.h index a560744f012222..e8f43c8c2e91f8 100644 --- a/llvm/include/llvm/Analysis/SimplifyQuery.h +++ b/llvm/include/llvm/Analysis/SimplifyQuery.h @@ -130,6 +130,12 @@ struct SimplifyQuery { Copy.CC = &CC; return Copy; } + + SimplifyQuery getWithoutCondContext() const { + SimplifyQuery Copy(*this); + Copy.CC = nullptr; + return Copy; + } }; } // end namespace llvm diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 40fe1ffe13f1b6..4b77c0046cc70f 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -1435,7 +1435,7 @@ static void computeKnownBitsFromOperator(const Operator *I, // inferred hold at original context instruction. TODO: It may be // correct to use the original context. IF warranted, explore and // add sufficient tests to cover. - SimplifyQuery RecQ = Q; + SimplifyQuery RecQ = Q.getWithoutCondContext(); RecQ.CxtI = P; computeKnownBits(R, DemandedElts, Known2, Depth + 1, RecQ); switch (Opcode) { @@ -1468,7 +1468,7 @@ static void computeKnownBitsFromOperator(const Operator *I, // phi. This is important because that is where the value is actually // "evaluated" even though it is used later somewhere else. (see also // D69571). - SimplifyQuery RecQ = Q; + SimplifyQuery RecQ = Q.getWithoutCondContext(); unsigned OpNum = P->getOperand(0) == R ? 0 : 1; Instruction *RInst = P->getIncomingBlock(OpNum)->getTerminator(); @@ -1546,7 +1546,7 @@ static void computeKnownBitsFromOperator(const Operator *I, // phi. This is important because that is where the value is actually // "evaluated" even though it is used later somewhere else. (see also // D69571). - SimplifyQuery RecQ = Q; + SimplifyQuery RecQ = Q.getWithoutCondContext(); RecQ.CxtI = P->getIncomingBlock(u)->getTerminator(); Known2 = KnownBits(BitWidth); @@ -2329,7 +2329,7 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, // it is an induction variable where in each step its value is a power of // two. auto *PN = cast(I); - SimplifyQuery RecQ = Q; + SimplifyQuery RecQ = Q.getWithoutCondContext(); // Check if it is an induction variable and always power of two. if (isPowerOfTwoRecurrence(PN, OrZero, Depth, RecQ)) @@ -2943,7 +2943,7 @@ static bool isKnownNonZeroFromOperator(const Operator *I, return true; // Check if all incoming values are non-zero using recursion. - SimplifyQuery RecQ = Q; + SimplifyQuery RecQ = Q.getWithoutCondContext(); unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1); return llvm::all_of(PN->operands(), [&](const Use &U) { if (U.get() == PN) @@ -3509,7 +3509,7 @@ static bool isNonEqualPHIs(const PHINode *PN1, const PHINode *PN2, if (UsedFullRecursion) return false; - SimplifyQuery RecQ = Q; + SimplifyQuery RecQ = Q.getWithoutCondContext(); RecQ.CxtI = IncomBB->getTerminator(); if (!isKnownNonEqual(IV1, IV2, DemandedElts, Depth + 1, RecQ)) return false; @@ -4001,7 +4001,7 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, // Take the minimum of all incoming values. This can't infinitely loop // because of our depth threshold. - SimplifyQuery RecQ = Q; + SimplifyQuery RecQ = Q.getWithoutCondContext(); Tmp = TyBits; for (unsigned i = 0, e = NumIncomingValues; i != e; ++i) { if (Tmp == 1) return Tmp; @@ -5909,10 +5909,10 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts, // Recurse, but cap the recursion to two levels, because we don't want // to waste time spinning around in loops. We need at least depth 2 to // detect known sign bits. - computeKnownFPClass( - IncValue, DemandedElts, InterestedClasses, KnownSrc, - PhiRecursionLimit, - Q.getWithInstruction(P->getIncomingBlock(U)->getTerminator())); + computeKnownFPClass(IncValue, DemandedElts, InterestedClasses, KnownSrc, + PhiRecursionLimit, + Q.getWithoutCondContext().getWithInstruction( + P->getIncomingBlock(U)->getTerminator())); if (First) { Known = KnownSrc; diff --git a/llvm/test/Transforms/InstCombine/pr100298.ll b/llvm/test/Transforms/InstCombine/pr100298.ll new file mode 100644 index 00000000000000..6cf2a71bb916e5 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/pr100298.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -passes=instcombine < %s | FileCheck %s + +; Make sure that the result of computeKnownBits for %indvar is correct. + +define i16 @pr100298() { +; CHECK-LABEL: define i16 @pr100298() { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[FOR_INC:.*]] +; CHECK: [[FOR_INC]]: +; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ -15, %[[ENTRY]] ], [ [[MASK:%.*]], %[[FOR_INC]] ] +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[INDVAR]], 9 +; CHECK-NEXT: [[MASK]] = and i32 [[ADD]], 65535 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[MASK]], 5 +; CHECK-NEXT: br i1 [[CMP1]], label %[[FOR_INC]], label %[[FOR_END:.*]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i16 +; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt i32 [[MASK]], 3 +; CHECK-NEXT: [[SHL:%.*]] = shl nuw i16 [[CONV]], 14 +; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP2]], i16 [[CONV]], i16 [[SHL]] +; CHECK-NEXT: ret i16 [[RES]] +; +entry: + br label %for.inc + +for.inc: + %indvar = phi i32 [ -15, %entry ], [ %mask, %for.inc ] + %add = add nsw i32 %indvar, 9 + %mask = and i32 %add, 65535 + %cmp1 = icmp ugt i32 %mask, 5 + br i1 %cmp1, label %for.inc, label %for.end + +for.end: + %conv = trunc i32 %add to i16 + %cmp2 = icmp ugt i32 %mask, 3 + %shl = shl nuw i16 %conv, 14 + %res = select i1 %cmp2, i16 %conv, i16 %shl + ret i16 %res +}