From fa09a6fea81f2a4c524b9ed952f42aece5a7c4d3 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Mon, 24 Feb 2020 15:36:17 -0500 Subject: [PATCH] Add patch for 31156 This imports the patch I put up in https://reviews.llvm.org/D75072 and should fix #31156. We should probably hold off on merging this for a few days while upstream review is ongoing. In the meantime, this branch should be convenient to try. Make sure to remember to build LLVM from source, not BB. --- deps/llvm.mk | 1 + deps/patches/llvm-D75072-SCEV-add-type.patch | 415 +++++++++++++++++++ 2 files changed, 416 insertions(+) create mode 100644 deps/patches/llvm-D75072-SCEV-add-type.patch diff --git a/deps/llvm.mk b/deps/llvm.mk index 5b6f8160d03b5..8f14a92ad2a8c 100644 --- a/deps/llvm.mk +++ b/deps/llvm.mk @@ -414,6 +414,7 @@ $(eval $(call LLVM_PATCH,llvm-test-plugin-mingw)) # mingw build $(eval $(call LLVM_PATCH,llvm7-revert-D44485)) $(eval $(call LLVM_PATCH,llvm-8.0-D66657-codegen-degenerate)) # remove for 10.0 $(eval $(call LLVM_PATCH,llvm-8.0-D71495-vectorize-freduce)) # remove for 10.0 +$(eval $(call LLVM_PATCH,llvm-D75072-SCEV-add-type)) endif # LLVM_VER 9.0 diff --git a/deps/patches/llvm-D75072-SCEV-add-type.patch b/deps/patches/llvm-D75072-SCEV-add-type.patch new file mode 100644 index 0000000000000..9a9e801e970a4 --- /dev/null +++ b/deps/patches/llvm-D75072-SCEV-add-type.patch @@ -0,0 +1,415 @@ +commit a55a3ab4dc5c66c153b2988fc4fa46b39bfc92fc +Author: Keno Fischer +Date: Mon Feb 24 14:18:22 2020 -0500 + + [SCEV] Record NI types in add exprs + + Summary: + This fixes a case where loop-reduce introduces ptrtoint/inttoptr for + non-integral address space pointers. Over the past several years, we + have gradually improved the SCEVExpander to actually do something + sensible for non-integral pointer types. However, that obviously + relies on the expander knowing what the type of the SCEV expression is. + That is usually the case, but there is one important case where it's + not: The type of an add expression is just the type of the last operand, + so if the non-integral pointer is not the last operand, later uses of + that SCEV may not realize that the given add expression contains + non-integral pointers and may try to expand it as integers. + + One interesting observation is that we do get away with this scheme in + shockingly many cases. The reason for this is that SCEV expressions + often have an `scUnknown` pointer base, which our sort order on the + operands of add expressions sort behind basically everything else, + so it usually ends up as the last operand. + + One situation where this fails is included as a test case. This test + case was bugpoint-reduced from the issue reported at + https://github.com/JuliaLang/julia/issues/31156. What happens here + is that the pointer base is an scAddRec from an outer loop, plus an + scUnknown integer offset. By our sort order, the scUnknown gets sorted + after the scAddRec pointer base, thus making an add expression of these + two operands have integer type. This then confuses the expander, into + attempting to expand the whole thing as integers, which will obviously + fail when reaching the non-integral pointer. + + I considered a few options to solve this, but here's what I ended up + settling on: The AddExpr class gains a new subclass that explicitly + stores the type of the expression. This subclass is used whenever one + of the operands is a non-integral pointer. To reduce the impact for the + regular case (where the SCEV expression contains no non-integral + pointers), a bit flag is kept in each flag expression to indicate + whether it is of non-integral pointer type (this should give the same + answer as asking if getType() is non-integral, but performing that + query may involve a pointer chase and requires the DataLayout). For + add expressions that flag is also used to indicate whether we're using + the subclass or not. This is slightly inefficient, because it uses + the subclass even in the (not uncommon) case where the last operand + does actually accurately reflect the non-integral pointer type. However, + it didn't seem worth the extra flag bit and complexity to do this + micro-optimization. + + I had hoped that we could additionally restrict mul exprs from + containing any non-integral pointers, and also require add exprs to + only have one operand containg such pointers (but not more), but this + turned out not to work. The reason for this is that SCEV wants to + form differences between pointers, which it represents as `A + B*-1`, + so we need to allow both multiplication by `-1` and addition with + multiple non-integral pointer arguments. I'm not super happy with + that situation, but I think it exposes a more general problem with + non-integral pointers in LLVM. We don't actually have a way to express + the difference between two non-integral pointers at the IR level. + In theory this is a problem for SCEV, because it means that we can't + materialize such SCEV expression. However, in practice, these + expressions generally have the same base pointer, so SCEV will + appropriately simplify them to just the integer components. + Nevertheless it is a bit unsatisfying. Perhaps we could have an + intrinsic that takes the byte difference between two pointers to the + same allocated object (in the same sense as is used in getelementptr), + which should be a sensible operation even for non-integral pointers. + However, given the practical considerations above, that's a project + for another time. For now, simply allowing the existing pointer-diff + pattern for non-integral pointers seems to work ok. + + Reviewers: sanjoy, reames, vtjnash, vchuravy + + Subscribers: hiraditya, javed.absar, llvm-commits + + Tags: #llvm + + Differential Revision: https://reviews.llvm.org/D75072 + +diff --git llvm/include/llvm/Analysis/ScalarEvolution.h llvm/include/llvm/Analysis/ScalarEvolution.h +index 0bd98ef37e7..317bdeac3f0 100644 +--- llvm/include/llvm/Analysis/ScalarEvolution.h ++++ llvm/include/llvm/Analysis/ScalarEvolution.h +@@ -118,6 +118,19 @@ public: + NoWrapMask = (1 << 3) - 1 + }; + ++ /// HasNonIntegralPointerFlag are bitfield indices into SubclassData. ++ /// ++ /// When constructing SCEV expressions for LLVM expressions with non-integral ++ /// pointer types, some additional processing is required to ensure that we ++ /// don't introduce any illegal transformations. However, non-integral pointer ++ /// types are a very rarely used feature, so we want to make sure to only do ++ /// such processing if they are actually used. To ensure minimal performance ++ /// impact, we memoize that fact in using these flags. ++ enum HasNonIntegralPointerFlag { ++ FlagNoNIPointers = 0, ++ FlagHasNIPointers = (1 << 3) ++ }; ++ + explicit SCEV(const FoldingSetNodeIDRef ID, unsigned SCEVTy, + unsigned short ExpressionSize) + : FastID(ID), SCEVType(SCEVTy), ExpressionSize(ExpressionSize) {} +@@ -154,6 +167,10 @@ public: + return ExpressionSize; + } + ++ bool hasNonIntegralPointers() const { ++ return SubclassData & FlagHasNIPointers; ++ } ++ + /// Print out the internal representation of this scalar to the specified + /// stream. This should really only be used for debugging purposes. + void print(raw_ostream &OS) const; +@@ -747,7 +764,7 @@ public: + BasicBlock *ExitingBlock); + + /// Return the number of times the backedge executes before the given exit +- /// would be taken; if not exactly computable, return SCEVCouldNotCompute. ++ /// would be taken; if not exactly computable, return SCEVCouldNotCompute. + /// For a single exit loop, this value is equivelent to the result of + /// getBackedgeTakenCount. The loop is guaranteed to exit (via *some* exit) + /// before the backedge is executed (ExitCount + 1) times. Note that there +diff --git llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h +index d008af7b7e6..39ab35a8b8c 100644 +--- llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h ++++ llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h +@@ -188,6 +188,13 @@ class Type; + return getNoWrapFlags(FlagNW) != FlagAnyWrap; + } + ++ void setHasNIPtr(bool HasNIPtr) { ++ if (HasNIPtr) ++ SubclassData |= FlagHasNIPointers; ++ else ++ SubclassData &= ~FlagHasNIPointers; ++ } ++ + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const SCEV *S) { + return S->getSCEVType() == scAddExpr || S->getSCEVType() == scMulExpr || +@@ -222,24 +229,54 @@ class Type; + class SCEVAddExpr : public SCEVCommutativeExpr { + friend class ScalarEvolution; + ++ protected: + SCEVAddExpr(const FoldingSetNodeIDRef ID, + const SCEV *const *O, size_t N) + : SCEVCommutativeExpr(ID, scAddExpr, O, N) {} + + public: +- Type *getType() const { +- // Use the type of the last operand, which is likely to be a pointer +- // type, if there is one. This doesn't usually matter, but it can help +- // reduce casts when the expressions are expanded. +- return getOperand(getNumOperands() - 1)->getType(); ++ /// Returns the type of the add expression, by looking either at the last ++ /// operand or deferring to the SCEVAddNIExpr subclass for non-integral ++ /// pointers. ++ Type *getType() const; ++ ++ /// Methods for support type inquiry through isa, cast, and dyn_cast: ++ static bool classof(const SCEV *S) { return S->getSCEVType() == scAddExpr; } ++ }; ++ ++ /// This node represents an addition of some number of SCEVs, one which ++ /// is a non-integral pointer type, requiring us to know the type exactly for ++ /// correctness. ++ class SCEVAddNIExpr : public SCEVAddExpr { ++ friend class ScalarEvolution; ++ PointerType *NIType; ++ ++ SCEVAddNIExpr(const FoldingSetNodeIDRef ID, const SCEV *const *O, size_t N, ++ PointerType *NIType) ++ : SCEVAddExpr(ID, O, N), NIType(NIType) { ++ SubclassData |= FlagHasNIPointers; + } + ++ public: ++ Type *getType() const { return NIType; } ++ + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const SCEV *S) { +- return S->getSCEVType() == scAddExpr; ++ return S->getSCEVType() == scAddExpr && S->hasNonIntegralPointers(); + } + }; + ++ inline Type *SCEVAddExpr::getType() const { ++ // In general, use the type of the last operand, which is likely to be a ++ // pointer type, if there is one. This doesn't usually matter, but it can ++ // help reduce casts when the expressions are expanded. In the (unusual) ++ // case that we're working with non-integral pointers, we have a subclass ++ // that stores that type explicitly. ++ if (hasNonIntegralPointers()) ++ return cast(this)->getType(); ++ return getOperand(getNumOperands() - 1)->getType(); ++ } ++ + /// This node represents multiplication of some number of SCEVs. + class SCEVMulExpr : public SCEVCommutativeExpr { + friend class ScalarEvolution; +@@ -249,6 +286,18 @@ class Type; + : SCEVCommutativeExpr(ID, scMulExpr, O, N) {} + + public: ++ Type *getType() const { ++ // In general, we can't form SCEVMulExprs with non-integral pointer types, ++ // but for the moment we need to allow a special case: Multiplying by ++ // -1 to be able express the difference between two pointers. In order ++ // to maintain the invariant that SCEVs with the NI flag set should have ++ // a type corresponding to the contained NI ptr, we need to return the ++ // type of the pointer here. ++ if (hasNonIntegralPointers()) ++ return getOperand(getNumOperands() - 1)->getType(); ++ return SCEVCommutativeExpr::getType(); ++ } ++ + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const SCEV *S) { + return S->getSCEVType() == scMulExpr; +@@ -475,9 +524,12 @@ class Type; + /// instances owned by a ScalarEvolution. + SCEVUnknown *Next; + +- SCEVUnknown(const FoldingSetNodeIDRef ID, Value *V, +- ScalarEvolution *se, SCEVUnknown *next) : +- SCEV(ID, scUnknown, 1), CallbackVH(V), SE(se), Next(next) {} ++ SCEVUnknown(const FoldingSetNodeIDRef ID, Value *V, ScalarEvolution *se, ++ SCEVUnknown *next, bool ValueIsNIPtr) ++ : SCEV(ID, scUnknown, 1), CallbackVH(V), SE(se), Next(next) { ++ if (ValueIsNIPtr) ++ SubclassData |= FlagHasNIPointers; ++ } + + // Implement CallbackVH. + void deleted() override; +diff --git llvm/lib/Analysis/ScalarEvolution.cpp llvm/lib/Analysis/ScalarEvolution.cpp +index bc2cfd6fcc4..2f8eb665c5d 100644 +--- llvm/lib/Analysis/ScalarEvolution.cpp ++++ llvm/lib/Analysis/ScalarEvolution.cpp +@@ -358,12 +358,13 @@ Type *SCEV::getType() const { + case scSignExtend: + return cast(this)->getType(); + case scAddRecExpr: +- case scMulExpr: + case scUMaxExpr: + case scSMaxExpr: + case scUMinExpr: + case scSMinExpr: + return cast(this)->getType(); ++ case scMulExpr: ++ return cast(this)->getType(); + case scAddExpr: + return cast(this)->getType(); + case scUDivExpr: +@@ -2441,8 +2442,9 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, + } + + // Limit recursion calls depth. +- if (Depth > MaxArithDepth || hasHugeExpression(Ops)) ++ if (Depth > MaxArithDepth || hasHugeExpression(Ops)) { + return getOrCreateAddExpr(Ops, Flags); ++ } + + // Okay, check to see if the same value occurs in the operand list more than + // once. If so, merge them together into an multiply expression. Since we +@@ -2783,16 +2785,27 @@ ScalarEvolution::getOrCreateAddExpr(ArrayRef Ops, + SCEV::NoWrapFlags Flags) { + FoldingSetNodeID ID; + ID.AddInteger(scAddExpr); +- for (const SCEV *Op : Ops) +- ID.AddPointer(Op); ++ bool HasNIPtr = false; ++ PointerType *NIPtrType = nullptr; ++ for (unsigned i = 0, e = Ops.size(); i != e; ++i) { ++ ID.AddPointer(Ops[i]); ++ if (Ops[i]->hasNonIntegralPointers()) { ++ HasNIPtr = true; ++ NIPtrType = cast(Ops[i]->getType()); ++ } ++ } + void *IP = nullptr; + SCEVAddExpr *S = + static_cast(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); + if (!S) { + const SCEV **O = SCEVAllocator.Allocate(Ops.size()); + std::uninitialized_copy(Ops.begin(), Ops.end(), O); +- S = new (SCEVAllocator) +- SCEVAddExpr(ID.Intern(SCEVAllocator), O, Ops.size()); ++ if (HasNIPtr) ++ S = new (SCEVAllocator) ++ SCEVAddNIExpr(ID.Intern(SCEVAllocator), O, Ops.size(), NIPtrType); ++ else ++ S = new (SCEVAllocator) ++ SCEVAddExpr(ID.Intern(SCEVAllocator), O, Ops.size()); + UniqueSCEVs.InsertNode(S, IP); + addToLoopUseLists(S); + } +@@ -2805,8 +2818,10 @@ ScalarEvolution::getOrCreateAddRecExpr(ArrayRef Ops, + const Loop *L, SCEV::NoWrapFlags Flags) { + FoldingSetNodeID ID; + ID.AddInteger(scAddRecExpr); +- for (unsigned i = 0, e = Ops.size(); i != e; ++i) ++ for (unsigned i = 0, e = Ops.size(); i != e; ++i) { ++ assert(i == 0 || !Ops[i]->hasNonIntegralPointers()); + ID.AddPointer(Ops[i]); ++ } + ID.AddPointer(L); + void *IP = nullptr; + SCEVAddRecExpr *S = +@@ -2820,6 +2835,7 @@ ScalarEvolution::getOrCreateAddRecExpr(ArrayRef Ops, + addToLoopUseLists(S); + } + S->setNoWrapFlags(Flags); ++ S->setHasNIPtr(Ops[0]->hasNonIntegralPointers()); + return S; + } + +@@ -2828,8 +2844,11 @@ ScalarEvolution::getOrCreateMulExpr(ArrayRef Ops, + SCEV::NoWrapFlags Flags) { + FoldingSetNodeID ID; + ID.AddInteger(scMulExpr); +- for (unsigned i = 0, e = Ops.size(); i != e; ++i) ++ bool HasNIPtr = false; ++ for (unsigned i = 0, e = Ops.size(); i != e; ++i) { ++ HasNIPtr |= Ops[i]->hasNonIntegralPointers(); + ID.AddPointer(Ops[i]); ++ } + void *IP = nullptr; + SCEVMulExpr *S = + static_cast(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); +@@ -2842,6 +2861,7 @@ ScalarEvolution::getOrCreateMulExpr(ArrayRef Ops, + addToLoopUseLists(S); + } + S->setNoWrapFlags(Flags); ++ S->setHasNIPtr(HasNIPtr); + return S; + } + +@@ -3666,8 +3686,11 @@ const SCEV *ScalarEvolution::getMinMaxExpr(unsigned Kind, + return ExistingSCEV; + const SCEV **O = SCEVAllocator.Allocate(Ops.size()); + std::uninitialized_copy(Ops.begin(), Ops.end(), O); +- SCEV *S = new (SCEVAllocator) SCEVMinMaxExpr( ++ SCEVMinMaxExpr *S = new (SCEVAllocator) SCEVMinMaxExpr( + ID.Intern(SCEVAllocator), static_cast(Kind), O, Ops.size()); ++ // For MinMaxExprs it's sufficient to see if the first Op has NI data, as the ++ // operands all need to be of the same type. ++ S->setHasNIPtr(Ops[0]->hasNonIntegralPointers()); + + UniqueSCEVs.InsertNode(S, IP); + addToLoopUseLists(S); +@@ -3744,8 +3767,9 @@ const SCEV *ScalarEvolution::getUnknown(Value *V) { + "Stale SCEVUnknown in uniquing map!"); + return S; + } ++ bool ValueIsNIPtr = getDataLayout().isNonIntegralPointerType(V->getType()); + SCEV *S = new (SCEVAllocator) SCEVUnknown(ID.Intern(SCEVAllocator), V, this, +- FirstUnknown); ++ FirstUnknown, ValueIsNIPtr); + FirstUnknown = cast(S); + UniqueSCEVs.InsertNode(S, IP); + return S; +diff --git llvm/test/Transforms/LoopStrengthReduce/nonintegral.ll llvm/test/Transforms/LoopStrengthReduce/nonintegral.ll +index 5648e3aa74a..6936521f3a6 100644 +--- llvm/test/Transforms/LoopStrengthReduce/nonintegral.ll ++++ llvm/test/Transforms/LoopStrengthReduce/nonintegral.ll +@@ -2,7 +2,7 @@ + + ; Address Space 10 is non-integral. The optimizer is not allowed to use + ; ptrtoint/inttoptr instructions. Make sure that this doesn't happen +-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12" ++target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13" + target triple = "x86_64-unknown-linux-gnu" + + define void @japi1__unsafe_getindex_65028(i64 addrspace(10)* %arg) { +@@ -43,3 +43,36 @@ if38: ; preds = %L119 + done: ; preds = %if38 + ret void + } ++ ++; This is a bugpoint-reduced regression test - It doesn't make too much sense by itself, ++; but creates the correct SCEV expressions to reproduce the issue. See ++; https://github.com/JuliaLang/julia/issues/31156 for the original bug report. ++define void @"japi1_permutedims!_4259"(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i1 %g, i8 addrspace(13)* %base) #0 { ++; CHECK-NOT: inttoptr ++; CHECK-NOT: ptrtoint ++; CHECK: getelementptr i8, i8 addrspace(13)* {{.*}}, i64 {{.*}} ++top: ++ br label %L42.L46_crit_edge.us ++ ++L42.L46_crit_edge.us: ; preds = %L82.us.us.loopexit, %top ++ %value_phi11.us = phi i64 [ %a, %top ], [ %2, %L82.us.us.loopexit ] ++ %0 = sub i64 %value_phi11.us, %b ++ %1 = add i64 %0, %c ++ %spec.select = select i1 %g, i64 %d, i64 0 ++ br label %L62.us.us ++ ++L82.us.us.loopexit: ; preds = %L62.us.us ++ %2 = add i64 %e, %value_phi11.us ++ br label %L42.L46_crit_edge.us ++ ++L62.us.us: ; preds = %L62.us.us, %L42.L46_crit_edge.us ++ %value_phi21.us.us = phi i64 [ %6, %L62.us.us ], [ %spec.select, %L42.L46_crit_edge.us ] ++ %3 = add i64 %1, %value_phi21.us.us ++ %4 = getelementptr inbounds i8, i8 addrspace(13)* %base, i64 %3 ++ %5 = load i8, i8 addrspace(13)* %4, align 1 ++ %6 = add i64 %f, %value_phi21.us.us ++ br i1 %g, label %L82.us.us.loopexit, label %L62.us.us, !llvm.loop !1 ++} ++ ++!1 = distinct !{!1, !2} ++!2 = !{!"llvm.loop.isvectorized", i32 1}