From 2213a354b9d1cc0153ac20367e5397c66e660c32 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 20 Oct 2020 18:34:31 -0700 Subject: [PATCH 001/179] [Polly] Delete unused lambda capture after 7175cffb2133048018df74c1b49d1d4962ea18f2 --- polly/lib/Transform/ForwardOpTree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polly/lib/Transform/ForwardOpTree.cpp b/polly/lib/Transform/ForwardOpTree.cpp index 56c5fefeab39a..141ac26b714d8 100644 --- a/polly/lib/Transform/ForwardOpTree.cpp +++ b/polly/lib/Transform/ForwardOpTree.cpp @@ -696,7 +696,7 @@ class ForwardOpTreeImpl : ZoneAlgorithm { } } - auto ExecAction = [this, TargetStmt, UseInst, DefStmt]() { + auto ExecAction = [this, TargetStmt, UseInst]() { // To ensure the right order, prepend this instruction before its // operands. This ensures that its operands are inserted before the // instruction using them. From 324a15ceade2dc5ac5b0f106e0127c5c47942614 Mon Sep 17 00:00:00 2001 From: Carl Ritson Date: Wed, 21 Oct 2020 11:38:21 +0900 Subject: [PATCH 002/179] [AMDGPU][NFC] Fix missing size in comment --- llvm/lib/Target/AMDGPU/SIRegisterInfo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h index 6cae479a69b58..978361b1a271c 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -41,7 +41,7 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo { static std::array, 16> RegSplitParts; // Table representing sub reg of given width and offset. - // First index is subreg size: 32, 64, 96, 128, 160, 192, 256, 512. + // First index is subreg size: 32, 64, 96, 128, 160, 192, 224, 256, 512. // Second index is 32 different dword offsets. static std::array, 9> SubRegFromChannelTable; From c17ae2916ccf45a0c1717bd5f11598cc4fff342a Mon Sep 17 00:00:00 2001 From: Geoffrey Martin-Noble Date: Tue, 20 Oct 2020 19:35:17 -0700 Subject: [PATCH 003/179] Remove unnecessary header include which violates layering This was introduced in https://reviews.llvm.org/D89774, but I don't think it should be necessary. Reviewed By: TaWeiTu, aeubanks Differential Revision: https://reviews.llvm.org/D89843 --- llvm/lib/Transforms/Utils/UnifyLoopExits.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp index c4868a6a799fe..0b718ed6136ea 100644 --- a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp +++ b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp @@ -21,7 +21,6 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/InitializePasses.h" -#include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" From 5e731625f3d641296d972c30da4018a9be78f3fe Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Tue, 20 Oct 2020 12:09:38 -0700 Subject: [PATCH 004/179] [NFC][MC] Use [MC]Register in MachineVerifier Differential Revision: https://reviews.llvm.org/D89815 --- llvm/lib/CodeGen/MachineVerifier.cpp | 117 ++++++++++++++------------- 1 file changed, 62 insertions(+), 55 deletions(-) diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index cf48d292d9320..dc25146d7868c 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -102,10 +102,10 @@ namespace { bool isFunctionRegBankSelected; bool isFunctionSelected; - using RegVector = SmallVector; + using RegVector = SmallVector; using RegMaskVector = SmallVector; - using RegSet = DenseSet; - using RegMap = DenseMap; + using RegSet = DenseSet; + using RegMap = DenseMap; using BlockSet = SmallPtrSet; const MachineInstr *FirstNonPHI; @@ -120,10 +120,10 @@ namespace { SlotIndex lastIndex; // Add Reg and any sub-registers to RV - void addRegWithSubRegs(RegVector &RV, unsigned Reg) { + void addRegWithSubRegs(RegVector &RV, Register Reg) { RV.push_back(Reg); - if (Register::isPhysicalRegister(Reg)) - for (const MCPhysReg &SubReg : TRI->subregs(Reg)) + if (Reg.isPhysical()) + for (const MCPhysReg &SubReg : TRI->subregs(Reg.asMCReg())) RV.push_back(SubReg); } @@ -159,8 +159,8 @@ namespace { // Add register to vregsRequired if it belongs there. Return true if // anything changed. - bool addRequired(unsigned Reg) { - if (!Register::isVirtualRegister(Reg)) + bool addRequired(Register Reg) { + if (!Reg.isVirtual()) return false; if (regsLiveOut.count(Reg)) return false; @@ -170,7 +170,7 @@ namespace { // Same for a full set. bool addRequired(const RegSet &RS) { bool Changed = false; - for (unsigned Reg : RS) + for (Register Reg : RS) Changed |= addRequired(Reg); return Changed; } @@ -184,7 +184,7 @@ namespace { } // Live-out registers are either in regsLiveOut or vregsPassed. - bool isLiveOut(unsigned Reg) const { + bool isLiveOut(Register Reg) const { return regsLiveOut.count(Reg) || vregsPassed.count(Reg); } }; @@ -192,13 +192,13 @@ namespace { // Extra register info per MBB. DenseMap MBBInfoMap; - bool isReserved(unsigned Reg) { - return Reg < regsReserved.size() && regsReserved.test(Reg); + bool isReserved(Register Reg) { + return Reg.id() < regsReserved.size() && regsReserved.test(Reg.id()); } - bool isAllocatable(unsigned Reg) const { - return Reg < TRI->getNumRegs() && TRI->isInAllocatableClass(Reg) && - !regsReserved.test(Reg); + bool isAllocatable(Register Reg) const { + return Reg.id() < TRI->getNumRegs() && TRI->isInAllocatableClass(Reg) && + !regsReserved.test(Reg.id()); } // Analysis information if available @@ -226,7 +226,7 @@ namespace { LLT MOVRegType = LLT{}); void report_context(const LiveInterval &LI) const; - void report_context(const LiveRange &LR, unsigned VRegUnit, + void report_context(const LiveRange &LR, Register VRegUnit, LaneBitmask LaneMask) const; void report_context(const LiveRange::Segment &S) const; void report_context(const VNInfo &VNI) const; @@ -234,18 +234,19 @@ namespace { void report_context(MCPhysReg PhysReg) const; void report_context_liverange(const LiveRange &LR) const; void report_context_lanemask(LaneBitmask LaneMask) const; - void report_context_vreg(unsigned VReg) const; - void report_context_vreg_regunit(unsigned VRegOrUnit) const; + void report_context_vreg(Register VReg) const; + void report_context_vreg_regunit(Register VRegOrUnit) const; void verifyInlineAsm(const MachineInstr *MI); void checkLiveness(const MachineOperand *MO, unsigned MONum); void checkLivenessAtUse(const MachineOperand *MO, unsigned MONum, - SlotIndex UseIdx, const LiveRange &LR, unsigned VRegOrUnit, + SlotIndex UseIdx, const LiveRange &LR, + Register VRegOrUnit, LaneBitmask LaneMask = LaneBitmask::getNone()); void checkLivenessAtDef(const MachineOperand *MO, unsigned MONum, - SlotIndex DefIdx, const LiveRange &LR, unsigned VRegOrUnit, - bool SubRangeCheck = false, + SlotIndex DefIdx, const LiveRange &LR, + Register VRegOrUnit, bool SubRangeCheck = false, LaneBitmask LaneMask = LaneBitmask::getNone()); void markReachable(const MachineBasicBlock *MBB); @@ -256,12 +257,12 @@ namespace { void verifyLiveVariables(); void verifyLiveIntervals(); void verifyLiveInterval(const LiveInterval&); - void verifyLiveRangeValue(const LiveRange&, const VNInfo*, unsigned, + void verifyLiveRangeValue(const LiveRange &, const VNInfo *, Register, LaneBitmask); - void verifyLiveRangeSegment(const LiveRange&, - const LiveRange::const_iterator I, unsigned, + void verifyLiveRangeSegment(const LiveRange &, + const LiveRange::const_iterator I, Register, LaneBitmask); - void verifyLiveRange(const LiveRange&, unsigned, + void verifyLiveRange(const LiveRange &, Register, LaneBitmask LaneMask = LaneBitmask::getNone()); void verifyStackFrame(); @@ -508,7 +509,7 @@ void MachineVerifier::report_context(const LiveInterval &LI) const { errs() << "- interval: " << LI << '\n'; } -void MachineVerifier::report_context(const LiveRange &LR, unsigned VRegUnit, +void MachineVerifier::report_context(const LiveRange &LR, Register VRegUnit, LaneBitmask LaneMask) const { report_context_liverange(LR); report_context_vreg_regunit(VRegUnit); @@ -532,11 +533,11 @@ void MachineVerifier::report_context(MCPhysReg PReg) const { errs() << "- p. register: " << printReg(PReg, TRI) << '\n'; } -void MachineVerifier::report_context_vreg(unsigned VReg) const { +void MachineVerifier::report_context_vreg(Register VReg) const { errs() << "- v. register: " << printReg(VReg, TRI) << '\n'; } -void MachineVerifier::report_context_vreg_regunit(unsigned VRegOrUnit) const { +void MachineVerifier::report_context_vreg_regunit(Register VRegOrUnit) const { if (Register::isVirtualRegister(VRegOrUnit)) { report_context_vreg(VRegOrUnit); } else { @@ -1958,8 +1959,10 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { } void MachineVerifier::checkLivenessAtUse(const MachineOperand *MO, - unsigned MONum, SlotIndex UseIdx, const LiveRange &LR, unsigned VRegOrUnit, - LaneBitmask LaneMask) { + unsigned MONum, SlotIndex UseIdx, + const LiveRange &LR, + Register VRegOrUnit, + LaneBitmask LaneMask) { LiveQueryResult LRQ = LR.Query(UseIdx); // Check if we have a segment at the use, note however that we only need one // live subregister range, the others may be dead. @@ -1980,8 +1983,11 @@ void MachineVerifier::checkLivenessAtUse(const MachineOperand *MO, } void MachineVerifier::checkLivenessAtDef(const MachineOperand *MO, - unsigned MONum, SlotIndex DefIdx, const LiveRange &LR, unsigned VRegOrUnit, - bool SubRangeCheck, LaneBitmask LaneMask) { + unsigned MONum, SlotIndex DefIdx, + const LiveRange &LR, + Register VRegOrUnit, + bool SubRangeCheck, + LaneBitmask LaneMask) { if (const VNInfo *VNI = LR.getVNInfoAt(DefIdx)) { assert(VNI && "NULL valno is not allowed"); if (VNI->def != DefIdx) { @@ -2025,7 +2031,7 @@ void MachineVerifier::checkLivenessAtDef(const MachineOperand *MO, void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { const MachineInstr *MI = MO->getParent(); - const unsigned Reg = MO->getReg(); + const Register Reg = MO->getReg(); // Both use and def operands can read a register. if (MO->readsReg()) { @@ -2043,8 +2049,9 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { if (LiveInts && !LiveInts->isNotInMIMap(*MI)) { SlotIndex UseIdx = LiveInts->getInstructionIndex(*MI); // Check the cached regunit intervals. - if (Register::isPhysicalRegister(Reg) && !isReserved(Reg)) { - for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { + if (Reg.isPhysical() && !isReserved(Reg)) { + for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid(); + ++Units) { if (MRI->isReservedRegUnit(*Units)) continue; if (const LiveRange *LR = LiveInts->getCachedRegUnit(*Units)) @@ -2190,9 +2197,9 @@ void MachineVerifier::visitMachineBundleAfter(const MachineInstr *MI) { // Kill any masked registers. while (!regMasks.empty()) { const uint32_t *Mask = regMasks.pop_back_val(); - for (unsigned Reg : regsLive) - if (Register::isPhysicalRegister(Reg) && - MachineOperand::clobbersPhysReg(Mask, Reg)) + for (Register Reg : regsLive) + if (Reg.isPhysical() && + MachineOperand::clobbersPhysReg(Mask, Reg.asMCReg())) regsDead.push_back(Reg); } set_subtract(regsLive, regsDead); regsDead.clear(); @@ -2225,7 +2232,7 @@ struct VRegFilter { // Add elements to the filter itself. \pre Input set \p FromRegSet must have // no duplicates. Both virtual and physical registers are fine. template void add(const RegSetT &FromRegSet) { - SmallVector VRegsBuffer; + SmallVector VRegsBuffer; filterAndAdd(FromRegSet, VRegsBuffer); } // Filter \p FromRegSet through the filter and append passed elements into \p @@ -2233,13 +2240,13 @@ struct VRegFilter { // \returns true if anything changed. template bool filterAndAdd(const RegSetT &FromRegSet, - SmallVectorImpl &ToVRegs) { + SmallVectorImpl &ToVRegs) { unsigned SparseUniverse = Sparse.size(); unsigned NewSparseUniverse = SparseUniverse; unsigned NewDenseSize = Dense.size(); size_t Begin = ToVRegs.size(); - for (unsigned Reg : FromRegSet) { - if (!Register::isVirtualRegister(Reg)) + for (Register Reg : FromRegSet) { + if (!Reg.isVirtual()) continue; unsigned Index = Register::virtReg2Index(Reg); if (Index < SparseUniverseMax) { @@ -2263,7 +2270,7 @@ struct VRegFilter { Sparse.resize(NewSparseUniverse); Dense.reserve(NewDenseSize); for (unsigned I = Begin; I < End; ++I) { - unsigned Reg = ToVRegs[I]; + Register Reg = ToVRegs[I]; unsigned Index = Register::virtReg2Index(Reg); if (Index < SparseUniverseMax) Sparse.set(Index); @@ -2296,7 +2303,7 @@ struct VRegFilter { // universe). filter_b implicitly contains all physical registers at all times. class FilteringVRegSet { VRegFilter Filter; - SmallVector VRegs; + SmallVector VRegs; public: // Set-up the filter_b. \pre Input register set \p RS must have no duplicates. @@ -2474,7 +2481,7 @@ void MachineVerifier::visitMachineFunctionAfter() { // Check for killed virtual registers that should be live out. for (const auto &MBB : *MF) { BBInfo &MInfo = MBBInfoMap[&MBB]; - for (unsigned VReg : MInfo.vregsRequired) + for (Register VReg : MInfo.vregsRequired) if (MInfo.regsKilled.count(VReg)) { report("Virtual register killed in block, but needed live out.", &MBB); errs() << "Virtual register " << printReg(VReg) @@ -2484,7 +2491,7 @@ void MachineVerifier::visitMachineFunctionAfter() { if (!MF->empty()) { BBInfo &MInfo = MBBInfoMap[&MF->front()]; - for (unsigned VReg : MInfo.vregsRequired) { + for (Register VReg : MInfo.vregsRequired) { report("Virtual register defs don't dominate all uses.", MF); report_context_vreg(VReg); } @@ -2543,8 +2550,8 @@ void MachineVerifier::visitMachineFunctionAfter() { void MachineVerifier::verifyLiveVariables() { assert(LiveVars && "Don't call verifyLiveVariables without LiveVars"); - for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = Register::index2VirtReg(i); + for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) { + Register Reg = Register::index2VirtReg(I); LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg); for (const auto &MBB : *MF) { BBInfo &MInfo = MBBInfoMap[&MBB]; @@ -2569,8 +2576,8 @@ void MachineVerifier::verifyLiveVariables() { void MachineVerifier::verifyLiveIntervals() { assert(LiveInts && "Don't call verifyLiveIntervals without LiveInts"); - for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = Register::index2VirtReg(i); + for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) { + Register Reg = Register::index2VirtReg(I); // Spilling and splitting may leave unused registers around. Skip them. if (MRI->reg_nodbg_empty(Reg)) @@ -2594,7 +2601,7 @@ void MachineVerifier::verifyLiveIntervals() { } void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR, - const VNInfo *VNI, unsigned Reg, + const VNInfo *VNI, Register Reg, LaneBitmask LaneMask) { if (VNI->isUnused()) return; @@ -2687,8 +2694,8 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR, void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, const LiveRange::const_iterator I, - unsigned Reg, LaneBitmask LaneMask) -{ + Register Reg, + LaneBitmask LaneMask) { const LiveRange::Segment &S = *I; const VNInfo *VNI = S.valno; assert(VNI && "Live segment has no valno"); @@ -2899,7 +2906,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, } } -void MachineVerifier::verifyLiveRange(const LiveRange &LR, unsigned Reg, +void MachineVerifier::verifyLiveRange(const LiveRange &LR, Register Reg, LaneBitmask LaneMask) { for (const VNInfo *VNI : LR.valnos) verifyLiveRangeValue(LR, VNI, Reg, LaneMask); @@ -2909,7 +2916,7 @@ void MachineVerifier::verifyLiveRange(const LiveRange &LR, unsigned Reg, } void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) { - unsigned Reg = LI.reg(); + Register Reg = LI.reg(); assert(Register::isVirtualRegister(Reg)); verifyLiveRange(LI, Reg); From d9f91a3d14526fbc1d8cc9b98dbd8a093acf37d0 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 20 Oct 2020 21:03:58 -0700 Subject: [PATCH 005/179] Revert D89381 "[SCEV] Recommit "Use nw flag and symbolic iteration count to sharpen ranges of AddRecs", attempt 2" This reverts commit a10a64e7e334dc878d281aba9a46f751fe606567. It broke polly/test/ScopInfo/NonAffine/non-affine-loop-condition-dependent-access_3.ll The difference suggests that this may be a serious issue. --- llvm/include/llvm/Analysis/ScalarEvolution.h | 7 -- llvm/lib/Analysis/ScalarEvolution.cpp | 75 ------------------- .../no-wrap-symbolic-becount.ll | 4 +- .../IndVarSimplify/X86/eliminate-trunc.ll | 2 +- .../promote-iv-to-eliminate-casts.ll | 8 +- 5 files changed, 7 insertions(+), 89 deletions(-) diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index 691c424a280c5..85c04a79ba451 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1497,13 +1497,6 @@ class ScalarEvolution { ConstantRange getRangeForAffineAR(const SCEV *Start, const SCEV *Stop, const SCEV *MaxBECount, unsigned BitWidth); - /// Determines the range for the affine non-self-wrapping SCEVAddRecExpr {\p - /// Start,+,\p Stop}. - ConstantRange getRangeForAffineNoSelfWrappingAR(const SCEVAddRecExpr *AddRec, - const SCEV *MaxBECount, - unsigned BitWidth, - RangeSignHint SignHint); - /// Try to compute a range for the affine SCEVAddRecExpr {\p Start,+,\p /// Stop} by "factoring out" a ternary expression from the add recurrence. /// Helper called by \c getRange. diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index b9c6cf8f13380..efc4600e248f2 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -5527,17 +5527,6 @@ ScalarEvolution::getRangeRef(const SCEV *S, ConservativeResult = ConservativeResult.intersectWith(RangeFromFactoring, RangeType); } - - // Now try symbolic BE count and more powerful methods. - MaxBECount = computeMaxBackedgeTakenCount(AddRec->getLoop()); - if (!isa(MaxBECount) && - getTypeSizeInBits(MaxBECount->getType()) <= BitWidth && - AddRec->hasNoSelfWrap()) { - auto RangeFromAffineNew = getRangeForAffineNoSelfWrappingAR( - AddRec, MaxBECount, BitWidth, SignHint); - ConservativeResult = - ConservativeResult.intersectWith(RangeFromAffineNew, RangeType); - } } return setRange(AddRec, SignHint, std::move(ConservativeResult)); @@ -5707,70 +5696,6 @@ ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start, return SR.intersectWith(UR, ConstantRange::Smallest); } -ConstantRange ScalarEvolution::getRangeForAffineNoSelfWrappingAR( - const SCEVAddRecExpr *AddRec, const SCEV *MaxBECount, unsigned BitWidth, - ScalarEvolution::RangeSignHint SignHint) { - assert(AddRec->isAffine() && "Non-affine AddRecs are not suppored!\n"); - assert(AddRec->hasNoSelfWrap() && - "This only works for non-self-wrapping AddRecs!"); - const bool IsSigned = SignHint == HINT_RANGE_SIGNED; - const SCEV *Step = AddRec->getStepRecurrence(*this); - // Let's make sure that we can prove that we do not self-wrap during - // MaxBECount iterations. We need this because MaxBECount is a maximum - // iteration count estimate, and we might infer nw from some exit for which we - // do not know max exit count (or any other side reasoning). - // TODO: Turn into assert at some point. - MaxBECount = getNoopOrZeroExtend(MaxBECount, AddRec->getType()); - const SCEV *RangeWidth = getNegativeSCEV(getOne(AddRec->getType())); - const SCEV *StepAbs = getUMinExpr(Step, getNegativeSCEV(Step)); - const SCEV *MaxItersWithoutWrap = getUDivExpr(RangeWidth, StepAbs); - if (!isKnownPredicate(ICmpInst::ICMP_ULE, MaxBECount, MaxItersWithoutWrap)) - return ConstantRange::getFull(BitWidth); - - ICmpInst::Predicate LEPred = - IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; - ICmpInst::Predicate GEPred = - IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; - const SCEV *Start = AddRec->getStart(); - const SCEV *End = AddRec->evaluateAtIteration(MaxBECount, *this); - - // We know that there is no self-wrap. Let's take Start and End values and - // look at all intermediate values V1, V2, ..., Vn that IndVar takes during - // the iteration. They either lie inside the range [Min(Start, End), - // Max(Start, End)] or outside it: - // - // Case 1: RangeMin ... Start V1 ... VN End ... RangeMax; - // Case 2: RangeMin Vk ... V1 Start ... End Vn ... Vk + 1 RangeMax; - // - // No self wrap flag guarantees that the intermediate values cannot be BOTH - // outside and inside the range [Min(Start, End), Max(Start, End)]. Using that - // knowledge, let's try to prove that we are dealing with Case 1. It is so if - // Start <= End and step is positive, or Start >= End and step is negative. - ConstantRange StartRange = - IsSigned ? getSignedRange(Start) : getUnsignedRange(Start); - ConstantRange EndRange = - IsSigned ? getSignedRange(End) : getUnsignedRange(End); - ConstantRange RangeBetween = StartRange.unionWith(EndRange); - // If they already cover full iteration space, we will know nothing useful - // even if we prove what we want to prove. - if (RangeBetween.isFullSet()) - return RangeBetween; - // Only deal with ranges that do not wrap (i.e. RangeMin < RangeMax). - bool IsWrappingRange = - IsSigned ? RangeBetween.getLower().sge(RangeBetween.getUpper()) - : RangeBetween.getLower().uge(RangeBetween.getUpper()); - if (IsWrappingRange) - return ConstantRange::getFull(BitWidth); - - if (isKnownPositive(Step) && - isKnownPredicateViaConstantRanges(LEPred, Start, End)) - return RangeBetween; - else if (isKnownNegative(Step) && - isKnownPredicateViaConstantRanges(GEPred, Start, End)) - return RangeBetween; - return ConstantRange::getFull(BitWidth); -} - ConstantRange ScalarEvolution::getRangeViaFactoring(const SCEV *Start, const SCEV *Step, const SCEV *MaxBECount, diff --git a/llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll b/llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll index b49ec80c6d289..77c1017edb1a6 100644 --- a/llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll +++ b/llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll @@ -7,7 +7,7 @@ define i32 @test_01(i32 %start, i32* %p, i32* %q) { ; CHECK-NEXT: %0 = zext i32 %start to i64 ; CHECK-NEXT: --> (zext i32 %start to i64) U: [0,4294967296) S: [0,4294967296) ; CHECK-NEXT: %indvars.iv = phi i64 [ %indvars.iv.next, %backedge ], [ %0, %entry ] -; CHECK-NEXT: --> {(zext i32 %start to i64),+,-1}<%loop> U: [0,4294967296) S: [0,4294967296) Exits: <> LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {(zext i32 %start to i64),+,-1}<%loop> U: [-4294967295,4294967296) S: [-4294967295,4294967296) Exits: <> LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv = phi i32 [ %start, %entry ], [ %iv.next, %backedge ] ; CHECK-NEXT: --> {%start,+,-1}<%loop> U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = add i32 %iv, -1 @@ -21,7 +21,7 @@ define i32 @test_01(i32 %start, i32* %p, i32* %q) { ; CHECK-NEXT: %stop = load i32, i32* %load.addr, align 4 ; CHECK-NEXT: --> %stop U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Variant } ; CHECK-NEXT: %indvars.iv.next = add nsw i64 %indvars.iv, -1 -; CHECK-NEXT: --> {(-1 + (zext i32 %start to i64)),+,-1}<%loop> U: [-4294967296,4294967295) S: [-1,4294967295) Exits: <> LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {(-1 + (zext i32 %start to i64)),+,-1}<%loop> U: [-4294967296,4294967295) S: [-4294967296,4294967295) Exits: <> LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test_01 ; CHECK-NEXT: Loop %loop: Unpredictable backedge-taken count. ; CHECK-NEXT: exit count for loop: (zext i32 %start to i64) diff --git a/llvm/test/Transforms/IndVarSimplify/X86/eliminate-trunc.ll b/llvm/test/Transforms/IndVarSimplify/X86/eliminate-trunc.ll index 4738369c0aca9..9fb7977c207f5 100644 --- a/llvm/test/Transforms/IndVarSimplify/X86/eliminate-trunc.ll +++ b/llvm/test/Transforms/IndVarSimplify/X86/eliminate-trunc.ll @@ -474,7 +474,7 @@ define void @test_10(i32 %n) { ; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 90 ; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP2]], i64 [[TMP1]], i64 90 -; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[UMIN]], -99 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[UMIN]], -99 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ -100, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll b/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll index 090efa44d9c04..5cc288c58e68c 100644 --- a/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll +++ b/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll @@ -196,7 +196,7 @@ define void @promote_latch_condition_decrementing_loop_01(i32* %p, i32* %a) { ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ [[TMP0]], [[PREHEADER]] ] ; CHECK-NEXT: [[EL:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: store atomic i32 0, i32* [[EL]] unordered, align 4 -; CHECK-NEXT: [[LOOPCOND:%.*]] = icmp ult i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[LOOPCOND:%.*]] = icmp slt i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 ; CHECK-NEXT: br i1 [[LOOPCOND]], label [[LOOPEXIT_LOOPEXIT:%.*]], label [[LOOP]] ; @@ -241,7 +241,7 @@ define void @promote_latch_condition_decrementing_loop_02(i32* %p, i32* %a) { ; CHECK-NEXT: [[EL:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: store atomic i32 0, i32* [[EL]] unordered, align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 -; CHECK-NEXT: [[LOOPCOND:%.*]] = icmp ult i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[LOOPCOND:%.*]] = icmp slt i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: br i1 [[LOOPCOND]], label [[LOOPEXIT_LOOPEXIT:%.*]], label [[LOOP]] ; @@ -285,7 +285,7 @@ define void @promote_latch_condition_decrementing_loop_03(i32* %p, i32* %a) { ; CHECK-NEXT: [[EL:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: store atomic i32 0, i32* [[EL]] unordered, align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 -; CHECK-NEXT: [[LOOPCOND:%.*]] = icmp ult i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[LOOPCOND:%.*]] = icmp slt i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: br i1 [[LOOPCOND]], label [[LOOPEXIT_LOOPEXIT:%.*]], label [[LOOP]] ; @@ -336,7 +336,7 @@ define void @promote_latch_condition_decrementing_loop_04(i32* %p, i32* %a, i1 % ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ [[TMP0]], [[PREHEADER]] ] ; CHECK-NEXT: [[EL:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: store atomic i32 0, i32* [[EL]] unordered, align 4 -; CHECK-NEXT: [[LOOPCOND:%.*]] = icmp ult i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[LOOPCOND:%.*]] = icmp slt i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 ; CHECK-NEXT: br i1 [[LOOPCOND]], label [[LOOPEXIT_LOOPEXIT:%.*]], label [[LOOP]] ; From 79a69f558f9fa6728da7354d4b30a97f0c945a58 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 20 Oct 2020 20:59:27 -0700 Subject: [PATCH 006/179] [X86] Error on using h-registers with REX prefix in the assembler instead of leaving it to a fatal error in the encoder. Using a fatal error is bad for user experience. Reviewed By: pengfei Differential Revision: https://reviews.llvm.org/D89837 --- .../lib/Target/X86/AsmParser/X86AsmParser.cpp | 31 +++++++++++++++++-- llvm/test/MC/X86/encoder-fail.s | 19 ++++++++++-- 2 files changed, 45 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index a077179cfd817..cb08b7de9afb2 100644 --- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -1210,8 +1210,6 @@ bool X86AsmParser::MatchRegisterByName(unsigned &RegNo, StringRef RegName, // FIXME: This should be done using Requires and // Requires so "eiz" usage in 64-bit instructions can be also // checked. - // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a - // REX prefix. if (RegNo == X86::RIZ || RegNo == X86::RIP || X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) || X86II::isX86_64NonExtLowByteReg(RegNo) || @@ -3619,6 +3617,33 @@ bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) { } } + const MCInstrDesc &MCID = MII.get(Inst.getOpcode()); + // Check that we aren't mixing AH/BH/CH/DH with REX prefix. We only need to + // check this with the legacy encoding, VEX/EVEX/XOP don't use REX. + if ((MCID.TSFlags & X86II::EncodingMask) == 0) { + MCPhysReg HReg = X86::NoRegister; + bool UsesRex = MCID.TSFlags & X86II::REX_W; + unsigned NumOps = Inst.getNumOperands(); + for (unsigned i = 0; i != NumOps; ++i) { + const MCOperand &MO = Inst.getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (Reg == X86::AH || Reg == X86::BH || Reg == X86::CH || Reg == X86::DH) + HReg = Reg; + if (X86II::isX86_64NonExtLowByteReg(Reg) || + X86II::isX86_64ExtendedReg(Reg)) + UsesRex = true; + } + + if (UsesRex && HReg != X86::NoRegister) { + StringRef RegName = X86IntelInstPrinter::getRegisterName(HReg); + return Error(Ops[0]->getStartLoc(), + "can't encode '" + RegName + "' in an instruction requiring " + "REX prefix."); + } + } + return false; } @@ -3989,6 +4014,8 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, unsigned NumSuccessfulMatches = std::count(std::begin(Match), std::end(Match), Match_Success); if (NumSuccessfulMatches == 1) { + if (!MatchingInlineAsm && validateInstruction(Inst, Operands)) + return true; // Some instructions need post-processing to, for example, tweak which // encoding is selected. Loop on it while changes happen so the // individual transformations can chain off each other. diff --git a/llvm/test/MC/X86/encoder-fail.s b/llvm/test/MC/X86/encoder-fail.s index d8d321fa8a1a7..2ca74a41c914c 100644 --- a/llvm/test/MC/X86/encoder-fail.s +++ b/llvm/test/MC/X86/encoder-fail.s @@ -1,3 +1,16 @@ -// RUN: not --crash llvm-mc -triple x86_64-unknown-unknown --show-encoding %s 2>&1 | FileCheck %s -// CHECK: LLVM ERROR: Cannot encode high byte register in REX-prefixed instruction - movzx %dh, %rsi +// RUN: not llvm-mc -triple x86_64-unknown-unknown --show-encoding %s 2>&1 | FileCheck %s + +// CHECK: error: can't encode 'dh' in an instruction requiring REX prefix. +movzx %dh, %rsi + +// CHECK: error: can't encode 'ah' in an instruction requiring REX prefix. +movzx %ah, %r8d + +// CHECK: error: can't encode 'bh' in an instruction requiring REX prefix. +add %bh, %sil + +// CHECK: error: can't encode 'ch' in an instruction requiring REX prefix. +mov %ch, (%r8) + +// CHECK: error: can't encode 'dh' in an instruction requiring REX prefix. +mov %dh, (%rax,%r8) From 80852a4f2fb154c6094bb9d9e3457757d5a60ad1 Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Wed, 21 Oct 2020 12:42:40 +0700 Subject: [PATCH 007/179] [SCEV] Prove implications of different type via truncation When we need to prove implication of expressions of different type width, the default strategy is to widen everything to wider type and prove in this type. This does not interact well with AddRecs with negative steps and unsigned predicates: such AddRec will likely not have a `nuw` flag, and its `zext` to wider type will not be an AddRec. In contraty, `trunc` of an AddRec in some cases can easily be proved to be an `AddRec` too. This patch introduces an alternative way to handling implications of different type widths. If we can prove that wider type values actually fit in the narrow type, we truncate them and prove the implication in narrow type. Differential Revision: https://reviews.llvm.org/D89548 Reviewed By: fhahn --- llvm/lib/Analysis/ScalarEvolution.cpp | 19 +++++++++ llvm/test/Analysis/ScalarEvolution/srem.ll | 2 +- .../Analysis/ScalarEvolutionTest.cpp | 41 +++++++++++++++++++ 3 files changed, 61 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index efc4600e248f2..6e351a53628fc 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -9699,6 +9699,25 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, // Balance the types. if (getTypeSizeInBits(LHS->getType()) < getTypeSizeInBits(FoundLHS->getType())) { + // For unsigned and equality predicates, try to prove that both found + // operands fit into narrow unsigned range. If so, try to prove facts in + // narrow types. + if (!CmpInst::isSigned(FoundPred)) { + auto *NarrowType = LHS->getType(); + auto *WideType = FoundLHS->getType(); + auto BitWidth = getTypeSizeInBits(NarrowType); + const SCEV *MaxValue = getZeroExtendExpr( + getConstant(APInt::getMaxValue(BitWidth)), WideType); + if (isKnownPredicate(ICmpInst::ICMP_ULE, FoundLHS, MaxValue) && + isKnownPredicate(ICmpInst::ICMP_ULE, FoundRHS, MaxValue)) { + const SCEV *TruncFoundLHS = getTruncateExpr(FoundLHS, NarrowType); + const SCEV *TruncFoundRHS = getTruncateExpr(FoundRHS, NarrowType); + if (isImpliedCondBalancedTypes(Pred, LHS, RHS, FoundPred, TruncFoundLHS, + TruncFoundRHS, Context)) + return true; + } + } + if (CmpInst::isSigned(Pred)) { LHS = getSignExtendExpr(LHS, FoundLHS->getType()); RHS = getSignExtendExpr(RHS, FoundLHS->getType()); diff --git a/llvm/test/Analysis/ScalarEvolution/srem.ll b/llvm/test/Analysis/ScalarEvolution/srem.ll index 197437b51ca12..76e0d4a5ec5b5 100644 --- a/llvm/test/Analysis/ScalarEvolution/srem.ll +++ b/llvm/test/Analysis/ScalarEvolution/srem.ll @@ -29,7 +29,7 @@ define dso_local void @_Z4loopi(i32 %width) local_unnamed_addr #0 { ; CHECK-NEXT: %add = add nsw i32 %2, %call ; CHECK-NEXT: --> (%2 + %call) U: full-set S: full-set Exits: <> LoopDispositions: { %for.cond: Variant } ; CHECK-NEXT: %inc = add nsw i32 %i.0, 1 -; CHECK-NEXT: --> {1,+,1}<%for.cond> U: [1,0) S: [1,0) Exits: (1 + %width) LoopDispositions: { %for.cond: Computable } +; CHECK-NEXT: --> {1,+,1}<%for.cond> U: full-set S: full-set Exits: (1 + %width) LoopDispositions: { %for.cond: Computable } ; CHECK-NEXT: Determining loop execution counts for: @_Z4loopi ; CHECK-NEXT: Loop %for.cond: backedge-taken count is %width ; CHECK-NEXT: Loop %for.cond: max backedge-taken count is -1 diff --git a/llvm/unittests/Analysis/ScalarEvolutionTest.cpp b/llvm/unittests/Analysis/ScalarEvolutionTest.cpp index be8941838f71a..ee70fe5e7ce5b 100644 --- a/llvm/unittests/Analysis/ScalarEvolutionTest.cpp +++ b/llvm/unittests/Analysis/ScalarEvolutionTest.cpp @@ -1316,4 +1316,45 @@ TEST_F(ScalarEvolutionsTest, UnsignedIsImpliedViaOperations) { }); } +TEST_F(ScalarEvolutionsTest, ProveImplicationViaNarrowing) { + LLVMContext C; + SMDiagnostic Err; + std::unique_ptr M = parseAssemblyString( + "define i32 @foo(i32 %start, i32* %q) { " + "entry: " + " %wide.start = zext i32 %start to i64 " + " br label %loop " + "loop: " + " %wide.iv = phi i64 [%wide.start, %entry], [%wide.iv.next, %backedge] " + " %iv = phi i32 [%start, %entry], [%iv.next, %backedge] " + " %cond = icmp eq i64 %wide.iv, 0 " + " br i1 %cond, label %exit, label %backedge " + "backedge: " + " %iv.next = add i32 %iv, -1 " + " %index = zext i32 %iv.next to i64 " + " %load.addr = getelementptr i32, i32* %q, i64 %index " + " %stop = load i32, i32* %load.addr " + " %loop.cond = icmp eq i32 %stop, 0 " + " %wide.iv.next = add nsw i64 %wide.iv, -1 " + " br i1 %loop.cond, label %loop, label %failure " + "exit: " + " ret i32 0 " + "failure: " + " unreachable " + "} ", + Err, C); + + ASSERT_TRUE(M && "Could not parse module?"); + ASSERT_TRUE(!verifyModule(*M) && "Must have been well formed!"); + + runWithSE(*M, "foo", [](Function &F, LoopInfo &LI, ScalarEvolution &SE) { + auto *IV = SE.getSCEV(getInstructionByName(F, "iv")); + auto *Zero = SE.getZero(IV->getType()); + auto *Backedge = getInstructionByName(F, "iv.next")->getParent(); + ASSERT_TRUE(Backedge); + EXPECT_TRUE(SE.isBasicBlockEntryGuardedByCond(Backedge, ICmpInst::ICMP_UGT, + IV, Zero)); + }); +} + } // end namespace llvm From bed02fa8b0ec61b442990fb2406f0157fd3f8c13 Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Wed, 21 Oct 2020 13:03:46 +0700 Subject: [PATCH 008/179] Revert "[SCEV] Prove implications of different type via truncation" This reverts commit 80852a4f2fb154c6094bb9d9e3457757d5a60ad1. Test is now broken because underlying required patch was also reverted SUDDENLY. --- llvm/lib/Analysis/ScalarEvolution.cpp | 19 --------- llvm/test/Analysis/ScalarEvolution/srem.ll | 2 +- .../Analysis/ScalarEvolutionTest.cpp | 41 ------------------- 3 files changed, 1 insertion(+), 61 deletions(-) diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 6e351a53628fc..efc4600e248f2 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -9699,25 +9699,6 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, // Balance the types. if (getTypeSizeInBits(LHS->getType()) < getTypeSizeInBits(FoundLHS->getType())) { - // For unsigned and equality predicates, try to prove that both found - // operands fit into narrow unsigned range. If so, try to prove facts in - // narrow types. - if (!CmpInst::isSigned(FoundPred)) { - auto *NarrowType = LHS->getType(); - auto *WideType = FoundLHS->getType(); - auto BitWidth = getTypeSizeInBits(NarrowType); - const SCEV *MaxValue = getZeroExtendExpr( - getConstant(APInt::getMaxValue(BitWidth)), WideType); - if (isKnownPredicate(ICmpInst::ICMP_ULE, FoundLHS, MaxValue) && - isKnownPredicate(ICmpInst::ICMP_ULE, FoundRHS, MaxValue)) { - const SCEV *TruncFoundLHS = getTruncateExpr(FoundLHS, NarrowType); - const SCEV *TruncFoundRHS = getTruncateExpr(FoundRHS, NarrowType); - if (isImpliedCondBalancedTypes(Pred, LHS, RHS, FoundPred, TruncFoundLHS, - TruncFoundRHS, Context)) - return true; - } - } - if (CmpInst::isSigned(Pred)) { LHS = getSignExtendExpr(LHS, FoundLHS->getType()); RHS = getSignExtendExpr(RHS, FoundLHS->getType()); diff --git a/llvm/test/Analysis/ScalarEvolution/srem.ll b/llvm/test/Analysis/ScalarEvolution/srem.ll index 76e0d4a5ec5b5..197437b51ca12 100644 --- a/llvm/test/Analysis/ScalarEvolution/srem.ll +++ b/llvm/test/Analysis/ScalarEvolution/srem.ll @@ -29,7 +29,7 @@ define dso_local void @_Z4loopi(i32 %width) local_unnamed_addr #0 { ; CHECK-NEXT: %add = add nsw i32 %2, %call ; CHECK-NEXT: --> (%2 + %call) U: full-set S: full-set Exits: <> LoopDispositions: { %for.cond: Variant } ; CHECK-NEXT: %inc = add nsw i32 %i.0, 1 -; CHECK-NEXT: --> {1,+,1}<%for.cond> U: full-set S: full-set Exits: (1 + %width) LoopDispositions: { %for.cond: Computable } +; CHECK-NEXT: --> {1,+,1}<%for.cond> U: [1,0) S: [1,0) Exits: (1 + %width) LoopDispositions: { %for.cond: Computable } ; CHECK-NEXT: Determining loop execution counts for: @_Z4loopi ; CHECK-NEXT: Loop %for.cond: backedge-taken count is %width ; CHECK-NEXT: Loop %for.cond: max backedge-taken count is -1 diff --git a/llvm/unittests/Analysis/ScalarEvolutionTest.cpp b/llvm/unittests/Analysis/ScalarEvolutionTest.cpp index ee70fe5e7ce5b..be8941838f71a 100644 --- a/llvm/unittests/Analysis/ScalarEvolutionTest.cpp +++ b/llvm/unittests/Analysis/ScalarEvolutionTest.cpp @@ -1316,45 +1316,4 @@ TEST_F(ScalarEvolutionsTest, UnsignedIsImpliedViaOperations) { }); } -TEST_F(ScalarEvolutionsTest, ProveImplicationViaNarrowing) { - LLVMContext C; - SMDiagnostic Err; - std::unique_ptr M = parseAssemblyString( - "define i32 @foo(i32 %start, i32* %q) { " - "entry: " - " %wide.start = zext i32 %start to i64 " - " br label %loop " - "loop: " - " %wide.iv = phi i64 [%wide.start, %entry], [%wide.iv.next, %backedge] " - " %iv = phi i32 [%start, %entry], [%iv.next, %backedge] " - " %cond = icmp eq i64 %wide.iv, 0 " - " br i1 %cond, label %exit, label %backedge " - "backedge: " - " %iv.next = add i32 %iv, -1 " - " %index = zext i32 %iv.next to i64 " - " %load.addr = getelementptr i32, i32* %q, i64 %index " - " %stop = load i32, i32* %load.addr " - " %loop.cond = icmp eq i32 %stop, 0 " - " %wide.iv.next = add nsw i64 %wide.iv, -1 " - " br i1 %loop.cond, label %loop, label %failure " - "exit: " - " ret i32 0 " - "failure: " - " unreachable " - "} ", - Err, C); - - ASSERT_TRUE(M && "Could not parse module?"); - ASSERT_TRUE(!verifyModule(*M) && "Must have been well formed!"); - - runWithSE(*M, "foo", [](Function &F, LoopInfo &LI, ScalarEvolution &SE) { - auto *IV = SE.getSCEV(getInstructionByName(F, "iv")); - auto *Zero = SE.getZero(IV->getType()); - auto *Backedge = getInstructionByName(F, "iv.next")->getParent(); - ASSERT_TRUE(Backedge); - EXPECT_TRUE(SE.isBasicBlockEntryGuardedByCond(Backedge, ICmpInst::ICMP_UGT, - IV, Zero)); - }); -} - } // end namespace llvm From 9fbb060418e00ed7ee3cc88fdd9b90d78b2623a5 Mon Sep 17 00:00:00 2001 From: Esme-Yi Date: Wed, 21 Oct 2020 06:38:22 +0000 Subject: [PATCH 009/179] [NFC][PowerPC]Add tests for folding RLWINM before and after RA. --- llvm/test/CodeGen/PowerPC/fold-rlwinm.mir | 19 +++++++++++++ llvm/test/CodeGen/PowerPC/vsx_builtins.ll | 34 +++++++++++++++++++++++ 2 files changed, 53 insertions(+) diff --git a/llvm/test/CodeGen/PowerPC/fold-rlwinm.mir b/llvm/test/CodeGen/PowerPC/fold-rlwinm.mir index ded5329a46cc9..7a220964b0fd1 100644 --- a/llvm/test/CodeGen/PowerPC/fold-rlwinm.mir +++ b/llvm/test/CodeGen/PowerPC/fold-rlwinm.mir @@ -182,3 +182,22 @@ body: | STW %2:gprc, %0:gprc, 100 BLR8 implicit $lr8, implicit $rm ... +--- +name: testFoldRLWINMAndANDI +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3 + ; CHECK-LABEL: name: testFoldRLWINMAndANDI + ; CHECK: liveins: $x3 + ; CHECK: [[COPY:%[0-9]+]]:g8rc = COPY $x3 + ; CHECK: [[COPY1:%[0-9]+]]:gprc = COPY [[COPY]].sub_32 + ; CHECK: [[RLWINM:%[0-9]+]]:gprc = RLWINM [[COPY1]], 4, 28, 31 + ; CHECK: [[ANDI_rec:%[0-9]+]]:gprc = ANDI_rec [[RLWINM]], 4, implicit-def $cr0 + ; CHECK: BLR8 implicit $lr8, implicit $rm + %0:g8rc = COPY $x3 + %1:gprc = COPY %0.sub_32:g8rc + %2:gprc = RLWINM %1:gprc, 4, 28, 31 + %3:gprc = ANDI_rec %2:gprc, 4, implicit-def $cr0 + BLR8 implicit $lr8, implicit $rm +... diff --git a/llvm/test/CodeGen/PowerPC/vsx_builtins.ll b/llvm/test/CodeGen/PowerPC/vsx_builtins.ll index 2ab747384b698..0aae50af26490 100644 --- a/llvm/test/CodeGen/PowerPC/vsx_builtins.ll +++ b/llvm/test/CodeGen/PowerPC/vsx_builtins.ll @@ -106,3 +106,37 @@ define i32 @test_vec_test_swsqrts(<4 x float> %a) { ret i32 %0 } declare i32 @llvm.ppc.vsx.xvtsqrtsp(<4 x float>) + +define i32 @xvtdivdp_andi(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: xvtdivdp_andi: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvtdivdp cr0, v2, v3 +; CHECK-NEXT: li r4, 222 +; CHECK-NEXT: mfocrf r3, 128 +; CHECK-NEXT: srwi r3, r3, 28 +; CHECK-NEXT: andi. r3, r3, 2 +; CHECK-NEXT: li r3, 22 +; CHECK-NEXT: iseleq r3, r4, r3 +; CHECK-NEXT: blr + entry: + %0 = tail call i32 @llvm.ppc.vsx.xvtdivdp(<2 x double> %a, <2 x double> %b) + %1 = and i32 %0, 2 + %cmp.not = icmp eq i32 %1, 0 + %retval.0 = select i1 %cmp.not, i32 222, i32 22 + ret i32 %retval.0 +} + +define i32 @xvtdivdp_shift(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: xvtdivdp_shift: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvtdivdp cr0, v2, v3 +; CHECK-NEXT: mfocrf r3, 128 +; CHECK-NEXT: srwi r3, r3, 28 +; CHECK-NEXT: rlwinm r3, r3, 28, 31, 31 +; CHECK-NEXT: blr +entry: + %0 = tail call i32 @llvm.ppc.vsx.xvtdivdp(<2 x double> %a, <2 x double> %b) + %1 = lshr i32 %0, 4 + %.lobit = and i32 %1, 1 + ret i32 %.lobit +} From 4de215ff187746ff02ffdc6bd56bb54d42cbfdca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Wed, 21 Oct 2020 09:33:51 +0300 Subject: [PATCH 010/179] Revert "[InstCombine] Add or((icmp ult/ule (A + C1), C3), (icmp ult/ule (A + C2), C3)) uniform vector support" Also revert "[InstCombine] foldOrOfICmps - use m_Specific instead of explicit comparisons. NFCI." to make the primarily intended revert work. This reverts commits ce13549761b6a22263e051dda09ef5122435008b and e372a5f86f6488bb0c2593a665d51fdd3a97c6e4. This commit caused failed asserts e.g. like this: $ cat repro.cpp bool a(char b) { return b >= '0' && b <= '9' || (b | 32) >= 'a' && (b | 32) <= 'z'; $ clang++ -target x86_64-linux-gnu -c -O2 repro.cpp clang++: ../include/llvm/ADT/APInt.h:1151: bool llvm::APInt::operator==(const llvm::APInt&) const: Assertion `BitWidth == RHS.BitWidth && "Comparison requires equal bit widths"' failed. --- .../InstCombine/InstCombineAndOrXor.cpp | 59 +++++++++---------- llvm/test/Transforms/InstCombine/or.ll | 10 ++-- 2 files changed, 35 insertions(+), 34 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 9c9b3f4dc8992..b34ba4e7908f3 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -2283,6 +2283,8 @@ Value *InstCombinerImpl::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate(); Value *LHS0 = LHS->getOperand(0), *RHS0 = RHS->getOperand(0); Value *LHS1 = LHS->getOperand(1), *RHS1 = RHS->getOperand(1); + auto *LHSC = dyn_cast(LHS1); + auto *RHSC = dyn_cast(RHS1); // Fold (icmp ult/ule (A + C1), C3) | (icmp ult/ule (A + C2), C3) // --> (icmp ult/ule ((A & ~(C1 ^ C2)) + max(C1, C2)), C3) @@ -2294,43 +2296,42 @@ Value *InstCombinerImpl::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, // 3) C1 ^ C2 is one-bit mask. // 4) LowRange1 ^ LowRange2 and HighRange1 ^ HighRange2 are one-bit mask. // This implies all values in the two ranges differ by exactly one bit. - const APInt *LHSVal, *RHSVal; if ((PredL == ICmpInst::ICMP_ULT || PredL == ICmpInst::ICMP_ULE) && - PredL == PredR && LHS->getType() == RHS->getType() && - LHS->getType()->isIntOrIntVectorTy() && match(LHS1, m_APInt(LHSVal)) && - match(RHS1, m_APInt(RHSVal)) && *LHSVal == *RHSVal && LHS->hasOneUse() && - RHS->hasOneUse()) { - Value *AddOpnd; - const APInt *LAddVal, *RAddVal; - if (match(LHS0, m_Add(m_Value(AddOpnd), m_APInt(LAddVal))) && - match(RHS0, m_Add(m_Specific(AddOpnd), m_APInt(RAddVal))) && - LAddVal->ugt(*LHSVal) && RAddVal->ugt(*LHSVal)) { - - APInt DiffC = *LAddVal ^ *RAddVal; - if (DiffC.isPowerOf2()) { - const APInt *MaxAddC = nullptr; - if (LAddVal->ult(*RAddVal)) - MaxAddC = RAddVal; + PredL == PredR && LHSC && RHSC && LHS->hasOneUse() && RHS->hasOneUse() && + LHSC->getType() == RHSC->getType() && + LHSC->getValue() == (RHSC->getValue())) { + + Value *LAddOpnd, *RAddOpnd; + ConstantInt *LAddC, *RAddC; + if (match(LHS0, m_Add(m_Value(LAddOpnd), m_ConstantInt(LAddC))) && + match(RHS0, m_Add(m_Value(RAddOpnd), m_ConstantInt(RAddC))) && + LAddC->getValue().ugt(LHSC->getValue()) && + RAddC->getValue().ugt(LHSC->getValue())) { + + APInt DiffC = LAddC->getValue() ^ RAddC->getValue(); + if (LAddOpnd == RAddOpnd && DiffC.isPowerOf2()) { + ConstantInt *MaxAddC = nullptr; + if (LAddC->getValue().ult(RAddC->getValue())) + MaxAddC = RAddC; else - MaxAddC = LAddVal; + MaxAddC = LAddC; - APInt RRangeLow = -*RAddVal; - APInt RRangeHigh = RRangeLow + *LHSVal; - APInt LRangeLow = -*LAddVal; - APInt LRangeHigh = LRangeLow + *LHSVal; + APInt RRangeLow = -RAddC->getValue(); + APInt RRangeHigh = RRangeLow + LHSC->getValue(); + APInt LRangeLow = -LAddC->getValue(); + APInt LRangeHigh = LRangeLow + LHSC->getValue(); APInt LowRangeDiff = RRangeLow ^ LRangeLow; APInt HighRangeDiff = RRangeHigh ^ LRangeHigh; APInt RangeDiff = LRangeLow.sgt(RRangeLow) ? LRangeLow - RRangeLow : RRangeLow - LRangeLow; if (LowRangeDiff.isPowerOf2() && LowRangeDiff == HighRangeDiff && - RangeDiff.ugt(*LHSVal)) { - Value *NewAnd = Builder.CreateAnd( - AddOpnd, ConstantInt::get(LHS0->getType(), ~DiffC)); - Value *NewAdd = Builder.CreateAdd( - NewAnd, ConstantInt::get(LHS0->getType(), *MaxAddC)); - return Builder.CreateICmp(LHS->getPredicate(), NewAdd, - ConstantInt::get(LHS0->getType(), *LHSVal)); + RangeDiff.ugt(LHSC->getValue())) { + Value *MaskC = ConstantInt::get(LAddC->getType(), ~DiffC); + + Value *NewAnd = Builder.CreateAnd(LAddOpnd, MaskC); + Value *NewAdd = Builder.CreateAdd(NewAnd, MaxAddC); + return Builder.CreateICmp(LHS->getPredicate(), NewAdd, LHSC); } } } @@ -2416,8 +2417,6 @@ Value *InstCombinerImpl::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, } // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2). - auto *LHSC = dyn_cast(LHS1); - auto *RHSC = dyn_cast(RHS1); if (!LHSC || !RHSC) return nullptr; diff --git a/llvm/test/Transforms/InstCombine/or.ll b/llvm/test/Transforms/InstCombine/or.ll index 7e4115cc8934d..b5e3af2c76525 100644 --- a/llvm/test/Transforms/InstCombine/or.ll +++ b/llvm/test/Transforms/InstCombine/or.ll @@ -650,10 +650,12 @@ define i1 @test46(i8 signext %c) { define <2 x i1> @test46_uniform(<2 x i8> %c) { ; CHECK-LABEL: @test46_uniform( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[C:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i8> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <2 x i8> [[TMP2]], -; CHECK-NEXT: ret <2 x i1> [[TMP3]] +; CHECK-NEXT: [[C_OFF:%.*]] = add <2 x i8> [[C:%.*]], +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult <2 x i8> [[C_OFF]], +; CHECK-NEXT: [[C_OFF17:%.*]] = add <2 x i8> [[C]], +; CHECK-NEXT: [[CMP2:%.*]] = icmp ult <2 x i8> [[C_OFF17]], +; CHECK-NEXT: [[OR:%.*]] = or <2 x i1> [[CMP1]], [[CMP2]] +; CHECK-NEXT: ret <2 x i1> [[OR]] ; %c.off = add <2 x i8> %c, %cmp1 = icmp ult <2 x i8> %c.off, From b4a289b03ced24799da818f7da4cb0febe35ac4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Fri, 16 Oct 2020 12:49:01 +0300 Subject: [PATCH 011/179] [libcxx] [test] Split the file_time_type synopsis test Split the resolution check to a separate test, which is marked as unsupported on windows. On windows (both with MS STL and libstdc++), the file time has 100 ns resolution; the standard doesn't mandate a specific resolution. Differential Revision: https://reviews.llvm.org/D89535 --- .../file_time_type.pass.cpp | 8 ------ ...file_time_type_resolution.compile.pass.cpp | 27 +++++++++++++++++++ 2 files changed, 27 insertions(+), 8 deletions(-) create mode 100644 libcxx/test/std/input.output/filesystems/fs.filesystem.synopsis/file_time_type_resolution.compile.pass.cpp diff --git a/libcxx/test/std/input.output/filesystems/fs.filesystem.synopsis/file_time_type.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.filesystem.synopsis/file_time_type.pass.cpp index 09b8aed5a5c8e..afdf0ec15b943 100644 --- a/libcxx/test/std/input.output/filesystems/fs.filesystem.synopsis/file_time_type.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.filesystem.synopsis/file_time_type.pass.cpp @@ -32,16 +32,8 @@ void test_trivial_clock() { ((void)odr_use); } -void test_time_point_resolution_and_range() { - using namespace fs; - using Dur = file_time_type::duration; - using Period = Dur::period; - ASSERT_SAME_TYPE(Period, std::nano); -} - int main(int, char**) { test_trivial_clock(); - test_time_point_resolution_and_range(); return 0; } diff --git a/libcxx/test/std/input.output/filesystems/fs.filesystem.synopsis/file_time_type_resolution.compile.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.filesystem.synopsis/file_time_type_resolution.compile.pass.cpp new file mode 100644 index 0000000000000..965e7a4620b87 --- /dev/null +++ b/libcxx/test/std/input.output/filesystems/fs.filesystem.synopsis/file_time_type_resolution.compile.pass.cpp @@ -0,0 +1,27 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, windows + +// MS STL and libstdc++ use the native windows file timestamp resolution, +// with 100 ns resolution. + +// + +// typedef TrivialClock file_time_type; + +#include "filesystem_include.h" +#include +#include + +#include "test_macros.h" + +using namespace fs; +using Dur = file_time_type::duration; +using Period = Dur::period; +ASSERT_SAME_TYPE(Period, std::nano); From f5815105d2762d68000996bff9041216cd7b732e Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Fri, 9 Oct 2020 14:29:15 +0100 Subject: [PATCH 012/179] [SVE] Remove reliance on TypeSize comparison operators in unit tests The EXPECT_XY comparison functions all rely upon using the existing TypeSize comparison operators, which we are deprecating in favour of isKnownXY. I've changed all such cases to compare either the known minimum size or the fixed size. Differential Revision: https://reviews.llvm.org/D89531 --- .../CodeGen/ScalableVectorMVTsTest.cpp | 22 ++++++++++------- llvm/unittests/IR/VectorTypesTest.cpp | 24 +++++++++---------- 2 files changed, 25 insertions(+), 21 deletions(-) diff --git a/llvm/unittests/CodeGen/ScalableVectorMVTsTest.cpp b/llvm/unittests/CodeGen/ScalableVectorMVTsTest.cpp index 48b950fa74e9f..566b2e75c70c5 100644 --- a/llvm/unittests/CodeGen/ScalableVectorMVTsTest.cpp +++ b/llvm/unittests/CodeGen/ScalableVectorMVTsTest.cpp @@ -139,19 +139,23 @@ TEST(ScalableVectorMVTsTest, SizeQueries) { EXPECT_EQ(nxv4i32.getSizeInBits(), nxv2i64.getSizeInBits()); EXPECT_EQ(nxv2f64.getSizeInBits(), nxv2i64.getSizeInBits()); EXPECT_NE(nxv2i32.getSizeInBits(), nxv4i32.getSizeInBits()); - EXPECT_LT(nxv2i32.getSizeInBits(), nxv2i64.getSizeInBits()); - EXPECT_LE(nxv4i32.getSizeInBits(), nxv2i64.getSizeInBits()); - EXPECT_GT(nxv4i32.getSizeInBits(), nxv2i32.getSizeInBits()); - EXPECT_GE(nxv2i64.getSizeInBits(), nxv4i32.getSizeInBits()); + EXPECT_LT(nxv2i32.getSizeInBits().getKnownMinSize(), + nxv2i64.getSizeInBits().getKnownMinSize()); + EXPECT_LE(nxv4i32.getSizeInBits().getKnownMinSize(), + nxv2i64.getSizeInBits().getKnownMinSize()); + EXPECT_GT(nxv4i32.getSizeInBits().getKnownMinSize(), + nxv2i32.getSizeInBits().getKnownMinSize()); + EXPECT_GE(nxv2i64.getSizeInBits().getKnownMinSize(), + nxv4i32.getSizeInBits().getKnownMinSize()); // Check equivalence and ordering on fixed types. EXPECT_EQ(v4i32.getSizeInBits(), v2i64.getSizeInBits()); EXPECT_EQ(v2f64.getSizeInBits(), v2i64.getSizeInBits()); EXPECT_NE(v2i32.getSizeInBits(), v4i32.getSizeInBits()); - EXPECT_LT(v2i32.getSizeInBits(), v2i64.getSizeInBits()); - EXPECT_LE(v4i32.getSizeInBits(), v2i64.getSizeInBits()); - EXPECT_GT(v4i32.getSizeInBits(), v2i32.getSizeInBits()); - EXPECT_GE(v2i64.getSizeInBits(), v4i32.getSizeInBits()); + EXPECT_LT(v2i32.getFixedSizeInBits(), v2i64.getFixedSizeInBits()); + EXPECT_LE(v4i32.getFixedSizeInBits(), v2i64.getFixedSizeInBits()); + EXPECT_GT(v4i32.getFixedSizeInBits(), v2i32.getFixedSizeInBits()); + EXPECT_GE(v2i64.getFixedSizeInBits(), v4i32.getFixedSizeInBits()); // Check that scalable and non-scalable types with the same minimum size // are not considered equal. @@ -159,7 +163,7 @@ TEST(ScalableVectorMVTsTest, SizeQueries) { ASSERT_FALSE(v2i64.getSizeInBits() == nxv2f64.getSizeInBits()); // Check that we can obtain a known-exact size from a non-scalable type. - EXPECT_EQ(v4i32.getSizeInBits(), 128U); + EXPECT_EQ(v4i32.getFixedSizeInBits(), 128U); EXPECT_EQ(v2i64.getFixedSizeInBits(), 128U); // Check that we can query the known minimum size for both scalable and diff --git a/llvm/unittests/IR/VectorTypesTest.cpp b/llvm/unittests/IR/VectorTypesTest.cpp index 37137053bf157..c8a09808428fa 100644 --- a/llvm/unittests/IR/VectorTypesTest.cpp +++ b/llvm/unittests/IR/VectorTypesTest.cpp @@ -286,10 +286,10 @@ TEST(VectorTypesTest, FixedLenComparisons) { EXPECT_EQ(V2I32Len.getKnownMinSize(), 64U); EXPECT_FALSE(V2I32Len.isScalable()); - EXPECT_LT(V2Int32Ty->getPrimitiveSizeInBits(), - V4Int32Ty->getPrimitiveSizeInBits()); - EXPECT_GT(V2Int64Ty->getPrimitiveSizeInBits(), - V2Int32Ty->getPrimitiveSizeInBits()); + EXPECT_LT(V2Int32Ty->getPrimitiveSizeInBits().getFixedSize(), + V4Int32Ty->getPrimitiveSizeInBits().getFixedSize()); + EXPECT_GT(V2Int64Ty->getPrimitiveSizeInBits().getFixedSize(), + V2Int32Ty->getPrimitiveSizeInBits().getFixedSize()); EXPECT_EQ(V4Int32Ty->getPrimitiveSizeInBits(), V2Int64Ty->getPrimitiveSizeInBits()); EXPECT_NE(V2Int32Ty->getPrimitiveSizeInBits(), @@ -332,14 +332,14 @@ TEST(VectorTypesTest, ScalableComparisons) { EXPECT_EQ(ScV2I32Len.getKnownMinSize(), 64U); EXPECT_TRUE(ScV2I32Len.isScalable()); - EXPECT_LT(ScV2Int32Ty->getPrimitiveSizeInBits(), - ScV4Int32Ty->getPrimitiveSizeInBits()); - EXPECT_GT(ScV2Int64Ty->getPrimitiveSizeInBits(), - ScV2Int32Ty->getPrimitiveSizeInBits()); - EXPECT_EQ(ScV4Int32Ty->getPrimitiveSizeInBits(), - ScV2Int64Ty->getPrimitiveSizeInBits()); - EXPECT_NE(ScV2Int32Ty->getPrimitiveSizeInBits(), - ScV2Int64Ty->getPrimitiveSizeInBits()); + EXPECT_LT(ScV2Int32Ty->getPrimitiveSizeInBits().getKnownMinSize(), + ScV4Int32Ty->getPrimitiveSizeInBits().getKnownMinSize()); + EXPECT_GT(ScV2Int64Ty->getPrimitiveSizeInBits().getKnownMinSize(), + ScV2Int32Ty->getPrimitiveSizeInBits().getKnownMinSize()); + EXPECT_EQ(ScV4Int32Ty->getPrimitiveSizeInBits().getKnownMinSize(), + ScV2Int64Ty->getPrimitiveSizeInBits().getKnownMinSize()); + EXPECT_NE(ScV2Int32Ty->getPrimitiveSizeInBits().getKnownMinSize(), + ScV2Int64Ty->getPrimitiveSizeInBits().getKnownMinSize()); // Check the DataLayout interfaces. EXPECT_EQ(DL.getTypeSizeInBits(ScV2Int64Ty), From 5b17b323a6179d60c58d5048e0679fbbe6782290 Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Wed, 30 Sep 2020 13:36:59 +0100 Subject: [PATCH 013/179] [SVE][CodeGen] Replace use of TypeSize comparison operator in CreateStackTemporary We were previously relying upon the TypeSize comparison operators to obtain the maximum size of two types, however use of such operators is being deprecated in favour of making the caller aware that it could be dealing with scalable vector types. I have changed the code to assert that the two types have the same scalable property and thus we can simply take the maximum of the known minimum sizes instead. Differential Revision: https://reviews.llvm.org/D88563 --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 5fe26a6d2abdf..b1e2679d86dc6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2045,7 +2045,14 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) { } SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { - TypeSize Bytes = std::max(VT1.getStoreSize(), VT2.getStoreSize()); + TypeSize VT1Size = VT1.getStoreSize(); + TypeSize VT2Size = VT2.getStoreSize(); + assert(VT1Size.isScalable() == VT2Size.isScalable() && + "Don't know how to choose the maximum size when creating a stack " + "temporary"); + TypeSize Bytes = + VT1Size.getKnownMinSize() > VT2Size.getKnownMinSize() ? VT1Size : VT2Size; + Type *Ty1 = VT1.getTypeForEVT(*getContext()); Type *Ty2 = VT2.getTypeForEVT(*getContext()); const DataLayout &DL = getDataLayout(); From d4d0b41a822bacffa42ecea3c0f4c6980463dd31 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 21 Oct 2020 00:42:39 -0700 Subject: [PATCH 014/179] [X86] Remove period from end of error message in assembler Addresses post-commit feedback from D89837. --- llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp | 2 +- llvm/test/MC/X86/encoder-fail.s | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index cb08b7de9afb2..7fd93cff692e5 100644 --- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -3640,7 +3640,7 @@ bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) { StringRef RegName = X86IntelInstPrinter::getRegisterName(HReg); return Error(Ops[0]->getStartLoc(), "can't encode '" + RegName + "' in an instruction requiring " - "REX prefix."); + "REX prefix"); } } diff --git a/llvm/test/MC/X86/encoder-fail.s b/llvm/test/MC/X86/encoder-fail.s index 2ca74a41c914c..a8b9f48c8fb70 100644 --- a/llvm/test/MC/X86/encoder-fail.s +++ b/llvm/test/MC/X86/encoder-fail.s @@ -1,16 +1,16 @@ // RUN: not llvm-mc -triple x86_64-unknown-unknown --show-encoding %s 2>&1 | FileCheck %s -// CHECK: error: can't encode 'dh' in an instruction requiring REX prefix. +// CHECK: error: can't encode 'dh' in an instruction requiring REX prefix movzx %dh, %rsi -// CHECK: error: can't encode 'ah' in an instruction requiring REX prefix. +// CHECK: error: can't encode 'ah' in an instruction requiring REX prefix movzx %ah, %r8d -// CHECK: error: can't encode 'bh' in an instruction requiring REX prefix. +// CHECK: error: can't encode 'bh' in an instruction requiring REX prefix add %bh, %sil -// CHECK: error: can't encode 'ch' in an instruction requiring REX prefix. +// CHECK: error: can't encode 'ch' in an instruction requiring REX prefix mov %ch, (%r8) -// CHECK: error: can't encode 'dh' in an instruction requiring REX prefix. +// CHECK: error: can't encode 'dh' in an instruction requiring REX prefix mov %dh, (%rax,%r8) From 580915d6a2970022d5b7e05d4587de0fd7126c31 Mon Sep 17 00:00:00 2001 From: Alex Zinenko Date: Tue, 20 Oct 2020 11:21:05 +0200 Subject: [PATCH 015/179] [mlir] Expose Value hierarchy to Python bindings Values are ubiquitous in the IR, in particular block argument and operation results are Values. Define Python classes for BlockArgument, OpResult and their common ancestor Value. Define pseudo-container classes for lists of block arguments and operation results, and use these containers to access the corresponding values in blocks and operations. Differential Revision: https://reviews.llvm.org/D89778 --- mlir/include/mlir-c/IR.h | 3 + mlir/lib/Bindings/Python/IRModules.cpp | 209 ++++++++++++++++++++++ mlir/lib/Bindings/Python/IRModules.h | 22 +++ mlir/lib/CAPI/IR/IR.cpp | 2 + mlir/test/Bindings/Python/ir_operation.py | 52 ++++++ 5 files changed, 288 insertions(+) diff --git a/mlir/include/mlir-c/IR.h b/mlir/include/mlir-c/IR.h index 8161234726477..2aeb306f72567 100644 --- a/mlir/include/mlir-c/IR.h +++ b/mlir/include/mlir-c/IR.h @@ -432,6 +432,9 @@ intptr_t mlirOpResultGetResultNumber(MlirValue value); /** Returns the type of the value. */ MlirType mlirValueGetType(MlirValue value); +/** Prints the value to the standard error stream. */ +void mlirValueDump(MlirValue value); + /** Prints a value by sending chunks of the string representation and * forwarding `userData to `callback`. Note that the callback may be called * several times with consecutive chunks of the string. */ diff --git a/mlir/lib/Bindings/Python/IRModules.cpp b/mlir/lib/Bindings/Python/IRModules.cpp index 2a768df0ffd9c..0c3e541d18b29 100644 --- a/mlir/lib/Bindings/Python/IRModules.cpp +++ b/mlir/lib/Bindings/Python/IRModules.cpp @@ -85,6 +85,14 @@ static const char kAppendBlockDocstring[] = The created block. )"; +static const char kValueDunderStrDocstring[] = + R"(Returns the string form of the value. + +If the value is a block argument, this is the assembly form of its type and the +position in the argument list. If the value is an operation result, this is +equivalent to printing the operation that produced it. +)"; + //------------------------------------------------------------------------------ // Conversion utilities. //------------------------------------------------------------------------------ @@ -732,6 +740,168 @@ bool PyType::operator==(const PyType &other) { return mlirTypeEqual(type, other.type); } +//------------------------------------------------------------------------------ +// PyValue and subclases. +//------------------------------------------------------------------------------ + +namespace { +/// CRTP base class for Python MLIR values that subclass Value and should be +/// castable from it. The value hierarchy is one level deep and is not supposed +/// to accommodate other levels unless core MLIR changes. +template class PyConcreteValue : public PyValue { +public: + // Derived classes must define statics for: + // IsAFunctionTy isaFunction + // const char *pyClassName + // and redefine bindDerived. + using ClassTy = py::class_; + using IsAFunctionTy = int (*)(MlirValue); + + PyConcreteValue() = default; + PyConcreteValue(PyOperationRef operationRef, MlirValue value) + : PyValue(operationRef, value) {} + PyConcreteValue(PyValue &orig) + : PyConcreteValue(orig.getParentOperation(), castFrom(orig)) {} + + /// Attempts to cast the original value to the derived type and throws on + /// type mismatches. + static MlirValue castFrom(PyValue &orig) { + if (!DerivedTy::isaFunction(orig.get())) { + auto origRepr = py::repr(py::cast(orig)).cast(); + throw SetPyError(PyExc_ValueError, llvm::Twine("Cannot cast value to ") + + DerivedTy::pyClassName + + " (from " + origRepr + ")"); + } + return orig.get(); + } + + /// Binds the Python module objects to functions of this class. + static void bind(py::module &m) { + auto cls = ClassTy(m, DerivedTy::pyClassName); + cls.def(py::init(), py::keep_alive<0, 1>()); + DerivedTy::bindDerived(cls); + } + + /// Implemented by derived classes to add methods to the Python subclass. + static void bindDerived(ClassTy &m) {} +}; + +/// Python wrapper for MlirBlockArgument. +class PyBlockArgument : public PyConcreteValue { +public: + static constexpr IsAFunctionTy isaFunction = mlirValueIsABlockArgument; + static constexpr const char *pyClassName = "BlockArgument"; + using PyConcreteValue::PyConcreteValue; + + static void bindDerived(ClassTy &c) { + c.def_property_readonly("owner", [](PyBlockArgument &self) { + return PyBlock(self.getParentOperation(), + mlirBlockArgumentGetOwner(self.get())); + }); + c.def_property_readonly("arg_number", [](PyBlockArgument &self) { + return mlirBlockArgumentGetArgNumber(self.get()); + }); + c.def("set_type", [](PyBlockArgument &self, PyType type) { + return mlirBlockArgumentSetType(self.get(), type); + }); + } +}; + +/// Python wrapper for MlirOpResult. +class PyOpResult : public PyConcreteValue { +public: + static constexpr IsAFunctionTy isaFunction = mlirValueIsAOpResult; + static constexpr const char *pyClassName = "OpResult"; + using PyConcreteValue::PyConcreteValue; + + static void bindDerived(ClassTy &c) { + c.def_property_readonly("owner", [](PyOpResult &self) { + assert( + mlirOperationEqual(self.getParentOperation()->get(), + mlirOpResultGetOwner(self.get())) && + "expected the owner of the value in Python to match that in the IR"); + return self.getParentOperation(); + }); + c.def_property_readonly("result_number", [](PyOpResult &self) { + return mlirOpResultGetResultNumber(self.get()); + }); + } +}; + +/// A list of block arguments. Internally, these are stored as consecutive +/// elements, random access is cheap. The argument list is associated with the +/// operation that contains the block (detached blocks are not allowed in +/// Python bindings) and extends its lifetime. +class PyBlockArgumentList { +public: + PyBlockArgumentList(PyOperationRef operation, MlirBlock block) + : operation(std::move(operation)), block(block) {} + + /// Returns the length of the block argument list. + intptr_t dunderLen() { + operation->checkValid(); + return mlirBlockGetNumArguments(block); + } + + /// Returns `index`-th element of the block argument list. + PyBlockArgument dunderGetItem(intptr_t index) { + if (index < 0 || index >= dunderLen()) { + throw SetPyError(PyExc_IndexError, + "attempt to access out of bounds region"); + } + PyValue value(operation, mlirBlockGetArgument(block, index)); + return PyBlockArgument(value); + } + + /// Defines a Python class in the bindings. + static void bind(py::module &m) { + py::class_(m, "BlockArgumentList") + .def("__len__", &PyBlockArgumentList::dunderLen) + .def("__getitem__", &PyBlockArgumentList::dunderGetItem); + } + +private: + PyOperationRef operation; + MlirBlock block; +}; + +/// A list of operation results. Internally, these are stored as consecutive +/// elements, random access is cheap. The result list is associated with the +/// operation whose results these are, and extends the lifetime of this +/// operation. +class PyOpResultList { +public: + PyOpResultList(PyOperationRef operation) : operation(operation) {} + + /// Returns the length of the result list. + intptr_t dunderLen() { + operation->checkValid(); + return mlirOperationGetNumResults(operation->get()); + } + + /// Returns `index`-th element in the result list. + PyOpResult dunderGetItem(intptr_t index) { + if (index < 0 || index >= dunderLen()) { + throw SetPyError(PyExc_IndexError, + "attempt to access out of bounds region"); + } + PyValue value(operation, mlirOperationGetResult(operation->get(), index)); + return PyOpResult(value); + } + + /// Defines a Python class in the bindings. + static void bind(py::module &m) { + py::class_(m, "OpResultList") + .def("__len__", &PyOpResultList::dunderLen) + .def("__getitem__", &PyOpResultList::dunderGetItem); + } + +private: + PyOperationRef operation; +}; + +} // end namespace + //------------------------------------------------------------------------------ // Standard attribute subclasses. //------------------------------------------------------------------------------ @@ -1793,6 +1963,10 @@ void mlir::python::populateIRSubmodule(py::module &m) { .def_property_readonly( "regions", [](PyOperation &self) { return PyRegionList(self.getRef()); }) + .def_property_readonly( + "results", + [](PyOperation &self) { return PyOpResultList(self.getRef()); }, + "Returns the list of Operation results.") .def("__iter__", [](PyOperation &self) { return PyRegionIterator(self.getRef()); }) .def( @@ -1833,6 +2007,12 @@ void mlir::python::populateIRSubmodule(py::module &m) { // Mapping of PyBlock. py::class_(m, "Block") + .def_property_readonly( + "arguments", + [](PyBlock &self) { + return PyBlockArgumentList(self.getParentOperation(), self.get()); + }, + "Returns a list of block arguments.") .def_property_readonly( "operations", [](PyBlock &self) { @@ -2015,11 +2195,40 @@ void mlir::python::populateIRSubmodule(py::module &m) { PyTupleType::bind(m); PyFunctionType::bind(m); + // Mapping of Value. + py::class_(m, "Value") + .def_property_readonly( + "context", + [](PyValue &self) { return self.getParentOperation()->getContext(); }, + "Context in which the value lives.") + .def( + "dump", [](PyValue &self) { mlirValueDump(self.get()); }, + kDumpDocstring) + .def( + "__str__", + [](PyValue &self) { + PyPrintAccumulator printAccum; + printAccum.parts.append("Value("); + mlirValuePrint(self.get(), printAccum.getCallback(), + printAccum.getUserData()); + printAccum.parts.append(")"); + return printAccum.join(); + }, + kValueDunderStrDocstring) + .def_property_readonly("type", [](PyValue &self) { + return PyType(self.getParentOperation()->getContext(), + mlirValueGetType(self.get())); + }); + PyBlockArgument::bind(m); + PyOpResult::bind(m); + // Container bindings. + PyBlockArgumentList::bind(m); PyBlockIterator::bind(m); PyBlockList::bind(m); PyOperationIterator::bind(m); PyOperationList::bind(m); + PyOpResultList::bind(m); PyRegionIterator::bind(m); PyRegionList::bind(m); } diff --git a/mlir/lib/Bindings/Python/IRModules.h b/mlir/lib/Bindings/Python/IRModules.h index c175018c8bb65..947b7343e35a3 100644 --- a/mlir/lib/Bindings/Python/IRModules.h +++ b/mlir/lib/Bindings/Python/IRModules.h @@ -23,6 +23,7 @@ class PyMlirContext; class PyModule; class PyOperation; class PyType; +class PyValue; /// Template for a reference to a concrete type which captures a python /// reference to its underlying python object. @@ -381,6 +382,27 @@ class PyType : public BaseContextObject { MlirType type; }; +/// Wrapper around the generic MlirValue. +/// Values are managed completely by the operation that resulted in their +/// definition. For op result value, this is the operation that defines the +/// value. For block argument values, this is the operation that contains the +/// block to which the value is an argument (blocks cannot be detached in Python +/// bindings so such operation always exists). +class PyValue { +public: + PyValue(PyOperationRef parentOperation, MlirValue value) + : parentOperation(parentOperation), value(value) {} + + MlirValue get() { return value; } + PyOperationRef &getParentOperation() { return parentOperation; } + + void checkValid() { return parentOperation->checkValid(); } + +private: + PyOperationRef parentOperation; + MlirValue value; +}; + void populateIRSubmodule(pybind11::module &m); } // namespace python diff --git a/mlir/lib/CAPI/IR/IR.cpp b/mlir/lib/CAPI/IR/IR.cpp index 4bae43c424fdd..104f6fda5c020 100644 --- a/mlir/lib/CAPI/IR/IR.cpp +++ b/mlir/lib/CAPI/IR/IR.cpp @@ -454,6 +454,8 @@ MlirType mlirValueGetType(MlirValue value) { return wrap(unwrap(value).getType()); } +void mlirValueDump(MlirValue value) { unwrap(value).dump(); } + void mlirValuePrint(MlirValue value, MlirStringCallback callback, void *userData) { detail::CallbackOstream stream(callback, userData); diff --git a/mlir/test/Bindings/Python/ir_operation.py b/mlir/test/Bindings/Python/ir_operation.py index 37b8305585283..e4dc71ac26efb 100644 --- a/mlir/test/Bindings/Python/ir_operation.py +++ b/mlir/test/Bindings/Python/ir_operation.py @@ -102,6 +102,35 @@ def walk_operations(indent, op): run(testTraverseOpRegionBlockIndices) +# CHECK-LABEL: TEST: testBlockArgumentList +def testBlockArgumentList(): + ctx = mlir.ir.Context() + module = ctx.parse_module(r""" + func @f1(%arg0: i32, %arg1: f64, %arg2: index) { + return + } + """) + func = module.operation.regions[0].blocks[0].operations[0] + entry_block = func.regions[0].blocks[0] + assert len(entry_block.arguments) == 3 + # CHECK: Argument 0, type i32 + # CHECK: Argument 1, type f64 + # CHECK: Argument 2, type index + for arg in entry_block.arguments: + print(f"Argument {arg.arg_number}, type {arg.type}") + new_type = mlir.ir.IntegerType.get_signless(ctx, 8 * (arg.arg_number + 1)) + arg.set_type(new_type) + + # CHECK: Argument 0, type i8 + # CHECK: Argument 1, type i16 + # CHECK: Argument 2, type i24 + for arg in entry_block.arguments: + print(f"Argument {arg.arg_number}, type {arg.type}") + + +run(testBlockArgumentList) + + # CHECK-LABEL: TEST: testDetachedOperation def testDetachedOperation(): ctx = mlir.ir.Context() @@ -196,3 +225,26 @@ def testOperationWithRegion(): print(module) run(testOperationWithRegion) + + +# CHECK-LABEL: TEST: testOperationResultList +def testOperationResultList(): + ctx = mlir.ir.Context() + module = ctx.parse_module(r""" + func @f1() { + %0:3 = call @f2() : () -> (i32, f64, index) + return + } + func @f2() -> (i32, f64, index) + """) + caller = module.operation.regions[0].blocks[0].operations[0] + call = caller.regions[0].blocks[0].operations[0] + assert len(call.results) == 3 + # CHECK: Result 0, type i32 + # CHECK: Result 1, type f64 + # CHECK: Result 2, type index + for res in call.results: + print(f"Result {res.result_number}, type {res.type}") + + +run(testOperationResultList) From 31782cd2199b808cda6733d6448e1314e2c8ec96 Mon Sep 17 00:00:00 2001 From: Alex Zinenko Date: Tue, 20 Oct 2020 11:22:02 +0200 Subject: [PATCH 016/179] [mlir] Use the correct base class for Attributes in Python bindings The pybind class typedef for concrete attribute classes was erroneously deriving all of them from PyAttribute instead of the provided base class. This has not been triggering any error because only one level of the hierarchy is currently exposed. Differential Revision: https://reviews.llvm.org/D89779 --- mlir/lib/Bindings/Python/IRModules.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Bindings/Python/IRModules.cpp b/mlir/lib/Bindings/Python/IRModules.cpp index 0c3e541d18b29..b0b24837276cf 100644 --- a/mlir/lib/Bindings/Python/IRModules.cpp +++ b/mlir/lib/Bindings/Python/IRModules.cpp @@ -919,7 +919,7 @@ class PyConcreteAttribute : public BaseTy { // Derived classes must define statics for: // IsAFunctionTy isaFunction // const char *pyClassName - using ClassTy = py::class_; + using ClassTy = py::class_; using IsAFunctionTy = int (*)(MlirAttribute); PyConcreteAttribute() = default; From f6f27115e622f2cc6b20579e9d7d84c2304091a1 Mon Sep 17 00:00:00 2001 From: Alex Zinenko Date: Tue, 20 Oct 2020 11:22:29 +0200 Subject: [PATCH 017/179] [mlir] Fix copy-pasted docstrings in Python bindings Docstrings for `__str__` method in many classes was recycling the constant string defined for `Type`, without being types themselves. Use proper docstrings instead. Since they are succint, use string literals instead of top-level constants to avoid further mistakes. Differential Revision: https://reviews.llvm.org/D89780 --- mlir/lib/Bindings/Python/IRModules.cpp | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/mlir/lib/Bindings/Python/IRModules.cpp b/mlir/lib/Bindings/Python/IRModules.cpp index b0b24837276cf..db8a220c9d31d 100644 --- a/mlir/lib/Bindings/Python/IRModules.cpp +++ b/mlir/lib/Bindings/Python/IRModules.cpp @@ -72,9 +72,6 @@ use the dedicated print method, which supports keyword arguments to customize behavior. )"; -static const char kTypeStrDunderDocstring[] = - R"(Prints the assembly form of the type.)"; - static const char kDumpDocstring[] = R"(Dumps a debug representation of the object to stderr.)"; @@ -1978,7 +1975,7 @@ void mlir::python::populateIRSubmodule(py::module &m) { printAccum.getUserData()); return printAccum.join(); }, - kTypeStrDunderDocstring); + "Returns the assembly form of the operation."); // Mapping of PyRegion. py::class_(m, "Region") @@ -2047,9 +2044,9 @@ void mlir::python::populateIRSubmodule(py::module &m) { printAccum.getUserData()); return printAccum.join(); }, - kTypeStrDunderDocstring); + "Returns the assembly form of the block."); - // Mapping of Type. + // Mapping of PyAttribute. py::class_(m, "Attribute") .def_property_readonly( "context", @@ -2086,7 +2083,7 @@ void mlir::python::populateIRSubmodule(py::module &m) { printAccum.getUserData()); return printAccum.join(); }, - kTypeStrDunderDocstring) + "Returns the assembly form of the Attribute.") .def("__repr__", [](PyAttribute &self) { // Generally, assembly formats are not printed for __repr__ because // this can cause exceptionally long debug output and exceptions. @@ -2139,7 +2136,7 @@ void mlir::python::populateIRSubmodule(py::module &m) { PyStringAttribute::bind(m); PyDenseElementsAttribute::bind(m); - // Mapping of Type. + // Mapping of PyType. py::class_(m, "Type") .def_property_readonly( "context", [](PyType &self) { return self.getContext().getObject(); }, @@ -2163,7 +2160,7 @@ void mlir::python::populateIRSubmodule(py::module &m) { printAccum.getUserData()); return printAccum.join(); }, - kTypeStrDunderDocstring) + "Returns the assembly form of the type.") .def("__repr__", [](PyType &self) { // Generally, assembly formats are not printed for __repr__ because // this can cause exceptionally long debug output and exceptions. From e32036b9732fd1ec4794fef48ec79a268aa8cbce Mon Sep 17 00:00:00 2001 From: "Wang, Pengfei" Date: Wed, 21 Oct 2020 15:57:43 +0800 Subject: [PATCH 018/179] [X86] Add clang release notes for HRESET and minor change for llvm release notes. (NFC) --- clang/docs/ReleaseNotes.rst | 2 ++ llvm/docs/ReleaseNotes.rst | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 540cf91326056..0e2915ccf73a4 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -200,6 +200,8 @@ X86 Support in Clang implies -mtune=. -mtune=generic is the default with no -march or -mtune specified. +- Support for feature ``HRESET`` has been added. + Internal API Changes -------------------- diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index d85f4bab210f3..d457e3fd87de7 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -118,7 +118,7 @@ During this release ... the "target-cpu" attribute or TargetMachine CPU which will be used to select Instruction Set. If the attribute is not present, the tune CPU will follow the target CPU. -* Support for ISA HRESET has been added. +* Support for feature ``HRESET`` has been added. Changes to the AMDGPU Target ----------------------------- From 60913ebcbcb84d1757a4596816637b3861c1cc08 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Wed, 21 Oct 2020 01:33:21 -0700 Subject: [PATCH 019/179] [NFC][LSAN] Use InitializeCommonFlags in LSAN --- compiler-rt/lib/lsan/lsan.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler-rt/lib/lsan/lsan.cpp b/compiler-rt/lib/lsan/lsan.cpp index c8cc045783d45..2c0a3bf0787c2 100644 --- a/compiler-rt/lib/lsan/lsan.cpp +++ b/compiler-rt/lib/lsan/lsan.cpp @@ -77,7 +77,7 @@ static void InitializeFlags() { parser.ParseString(lsan_default_options); parser.ParseStringFromEnv("LSAN_OPTIONS"); - SetVerbosity(common_flags()->verbosity); + InitializeCommonFlags(); if (Verbosity()) ReportUnrecognizedFlags(); From 58f6b16c4981f91c49f6878ac342562a7a29d385 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Marques?= Date: Wed, 21 Oct 2020 09:47:25 +0100 Subject: [PATCH 020/179] [compiler-rt][builtins][RISCV] Always include __mul[sd]i3 builtin definitions The RISC-V implementations of the `__mulsi3`, `__muldi3` builtins were conditionally compiling the actual function definitions depending on whether the M extension was present or not. This caused Compiler-RT testing failures for RISC-V targets with the M extension, as when these sources were included the `librt_has_mul*i3` features were still being defined. These `librt_has_*` definitions are used to conditionally run the respective tests. Since the actual functions were not being compiled-in, the generic test for `__muldi3` would fail. This patch makes these implementations follow the normal Compiler-RT convention of always including the definition, and conditionally running the respective tests by using the lit conditional `REQUIRES: librt_has_*`. Since the `mulsi3_test.c` wasn't actually RISC-V-specific, this patch also moves it out of the `riscv` directory. It now only depends on `librt_has_mulsi3` to run. Differential Revision: https://reviews.llvm.org/D86457 --- compiler-rt/lib/builtins/riscv/int_mul_impl.inc | 6 ++++-- compiler-rt/test/builtins/Unit/muldi3_test.c | 2 +- .../test/builtins/Unit/{riscv => }/mulsi3_test.c | 12 ++---------- 3 files changed, 7 insertions(+), 13 deletions(-) rename compiler-rt/test/builtins/Unit/{riscv => }/mulsi3_test.c (90%) diff --git a/compiler-rt/lib/builtins/riscv/int_mul_impl.inc b/compiler-rt/lib/builtins/riscv/int_mul_impl.inc index 50951d5f4195f..53699b356f6a8 100644 --- a/compiler-rt/lib/builtins/riscv/int_mul_impl.inc +++ b/compiler-rt/lib/builtins/riscv/int_mul_impl.inc @@ -10,7 +10,10 @@ // //===----------------------------------------------------------------------===// -#if !defined(__riscv_mul) +#ifndef __mulxi3 +#error "__mulxi3 must be defined to use this generic implementation" +#endif + .text .align 2 @@ -28,4 +31,3 @@ __mulxi3: slli a2, a2, 1 bnez a1, .L1 ret -#endif diff --git a/compiler-rt/test/builtins/Unit/muldi3_test.c b/compiler-rt/test/builtins/Unit/muldi3_test.c index 74530cbfb09a6..c03570f029809 100644 --- a/compiler-rt/test/builtins/Unit/muldi3_test.c +++ b/compiler-rt/test/builtins/Unit/muldi3_test.c @@ -11,7 +11,7 @@ int test__muldi3(di_int a, di_int b, di_int expected) di_int x = __muldi3(a, b); if (x != expected) printf("error in __muldi3: %lld * %lld = %lld, expected %lld\n", - a, b, __muldi3(a, b), expected); + a, b, x, expected); return x != expected; } diff --git a/compiler-rt/test/builtins/Unit/riscv/mulsi3_test.c b/compiler-rt/test/builtins/Unit/mulsi3_test.c similarity index 90% rename from compiler-rt/test/builtins/Unit/riscv/mulsi3_test.c rename to compiler-rt/test/builtins/Unit/mulsi3_test.c index d4621acbbbb5a..0b0c14204efdf 100644 --- a/compiler-rt/test/builtins/Unit/riscv/mulsi3_test.c +++ b/compiler-rt/test/builtins/Unit/mulsi3_test.c @@ -1,13 +1,10 @@ -// REQUIRES: riscv32-target-arch // RUN: %clang_builtins %s %librt -o %t && %run %t +// REQUIRES: librt_has_mulsi3 #include "int_lib.h" #include #include -#if !defined(__riscv_mul) && __riscv_xlen == 32 -// Based on mulsi3_test.c - COMPILER_RT_ABI si_int __mulsi3(si_int a, si_int b); int test__mulsi3(si_int a, si_int b, si_int expected) @@ -15,14 +12,12 @@ int test__mulsi3(si_int a, si_int b, si_int expected) si_int x = __mulsi3(a, b); if (x != expected) printf("error in __mulsi3: %d * %d = %d, expected %d\n", - a, b, __mulsi3(a, b), expected); + a, b, x, expected); return x != expected; } -#endif int main() { -#if !defined(__riscv_mul) && __riscv_xlen == 32 if (test__mulsi3(0, 0, 0)) return 1; if (test__mulsi3(0, 1, 0)) @@ -99,9 +94,6 @@ int main() return 1; if (test__mulsi3(-8192, -4194303, 34359730176)) return 1; -#else - printf("skipped\n"); -#endif return 0; } From 42a82862b625279028130e62846d057425bca691 Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Wed, 14 Oct 2020 08:48:29 +0200 Subject: [PATCH 021/179] Reapply "[clang] Improve handling of physical registers in inline assembly operands." Earlyclobbers are now excepted from this change (original commit: c78da03). Review: Ulrich Weigand, Nick Desaulniers Differential Revision: https://reviews.llvm.org/D87279 --- clang/lib/CodeGen/CGStmt.cpp | 20 +++++++++++++++++--- clang/test/CodeGen/aarch64-inline-asm.c | 6 ++++++ clang/test/CodeGen/systemz-inline-asm-02.c | 13 +++++++++++++ clang/test/CodeGen/systemz-inline-asm.c | 14 ++++++++++++++ 4 files changed, 50 insertions(+), 3 deletions(-) create mode 100644 clang/test/CodeGen/systemz-inline-asm-02.c diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index 4b813be086e12..3f8f77654f6d3 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -21,6 +21,7 @@ #include "clang/Basic/PrettyStackTrace.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/TargetInfo.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" @@ -1953,7 +1954,8 @@ SimplifyConstraint(const char *Constraint, const TargetInfo &Target, static std::string AddVariableConstraints(const std::string &Constraint, const Expr &AsmExpr, const TargetInfo &Target, CodeGenModule &CGM, - const AsmStmt &Stmt, const bool EarlyClobber) { + const AsmStmt &Stmt, const bool EarlyClobber, + std::string *GCCReg = nullptr) { const DeclRefExpr *AsmDeclRef = dyn_cast(&AsmExpr); if (!AsmDeclRef) return Constraint; @@ -1978,6 +1980,8 @@ AddVariableConstraints(const std::string &Constraint, const Expr &AsmExpr, } // Canonicalize the register here before returning it. Register = Target.getNormalizedGCCRegisterName(Register); + if (GCCReg != nullptr) + *GCCReg = Register.str(); return (EarlyClobber ? "&{" : "{") + Register.str() + "}"; } @@ -2176,6 +2180,9 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { // Keep track of out constraints for tied input operand. std::vector OutputConstraints; + // Keep track of defined physregs. + llvm::SmallSet PhysRegOutputs; + // An inline asm can be marked readonly if it meets the following conditions: // - it doesn't have any sideeffects // - it doesn't clobber memory @@ -2195,9 +2202,15 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { const Expr *OutExpr = S.getOutputExpr(i); OutExpr = OutExpr->IgnoreParenNoopCasts(getContext()); + std::string GCCReg; OutputConstraint = AddVariableConstraints(OutputConstraint, *OutExpr, getTarget(), CGM, S, - Info.earlyClobber()); + Info.earlyClobber(), + &GCCReg); + // Give an error on multiple outputs to same physreg. + if (!GCCReg.empty() && !PhysRegOutputs.insert(GCCReg).second) + CGM.Error(S.getAsmLoc(), "multiple outputs to hard register: " + GCCReg); + OutputConstraints.push_back(OutputConstraint); LValue Dest = EmitLValue(OutExpr); if (!Constraints.empty()) @@ -2284,7 +2297,8 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { LargestVectorWidth = std::max((uint64_t)LargestVectorWidth, VT->getPrimitiveSizeInBits().getKnownMinSize()); - if (Info.allowsRegister()) + // Only tie earlyclobber physregs. + if (Info.allowsRegister() && (GCCReg.empty() || Info.earlyClobber())) InOutConstraints += llvm::utostr(i); else InOutConstraints += OutputConstraint; diff --git a/clang/test/CodeGen/aarch64-inline-asm.c b/clang/test/CodeGen/aarch64-inline-asm.c index 0889a7157f0b1..a6e8faef8b9e2 100644 --- a/clang/test/CodeGen/aarch64-inline-asm.c +++ b/clang/test/CodeGen/aarch64-inline-asm.c @@ -74,3 +74,9 @@ void test_gcc_registers(void) { asm volatile("mov r0, r1\n"); // CHECK: call void asm sideeffect "mov r0, r1\0A", ""() } + +void test_tied_earlyclobber(void) { + register int a asm("x1"); + asm("" : "+&r"(a)); + // CHECK: call i32 asm "", "=&{x1},0"(i32 %0) +} diff --git a/clang/test/CodeGen/systemz-inline-asm-02.c b/clang/test/CodeGen/systemz-inline-asm-02.c new file mode 100644 index 0000000000000..754d7e66f04b2 --- /dev/null +++ b/clang/test/CodeGen/systemz-inline-asm-02.c @@ -0,0 +1,13 @@ +// RUN: not %clang_cc1 -triple s390x-linux-gnu -O2 -emit-llvm -o - %s 2>&1 \ +// RUN: | FileCheck %s +// REQUIRES: systemz-registered-target + +// Test that an error is given if a physreg is defined by multiple operands. +int test_physreg_defs(void) { + register int l __asm__("r7") = 0; + + // CHECK: error: multiple outputs to hard register: r7 + __asm__("" : "+r"(l), "=r"(l)); + + return l; +} diff --git a/clang/test/CodeGen/systemz-inline-asm.c b/clang/test/CodeGen/systemz-inline-asm.c index c5497655f505f..357fd4c184e70 100644 --- a/clang/test/CodeGen/systemz-inline-asm.c +++ b/clang/test/CodeGen/systemz-inline-asm.c @@ -129,3 +129,17 @@ long double test_f128(long double f, long double g) { // CHECK: [[RESULT:%.*]] = tail call fp128 asm "axbr $0, $2", "=f,0,f"(fp128 %f, fp128 %g) // CHECK: store fp128 [[RESULT]], fp128* [[DEST]] } + +// Test that there are no tied physreg uses. TwoAddress pass cannot deal with them. +int test_physregs(void) { + // CHECK-LABEL: define signext i32 @test_physregs() + register int l __asm__("r7") = 0; + + // CHECK: call i32 asm "lr $0, $1", "={r7},{r7}" + __asm__("lr %0, %1" : "+r"(l)); + + // CHECK: call i32 asm "$0 $1 $2", "={r7},{r7},{r7}" + __asm__("%0 %1 %2" : "+r"(l) : "r"(l)); + + return l; +} From f6a5699c6cb5df03d9e50c17fd47edab3fefd6bf Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Wed, 21 Oct 2020 09:17:28 +0100 Subject: [PATCH 022/179] [AMDGPU][TableGen] Make more use of !ne !not !and !or. NFC. --- llvm/lib/Target/AMDGPU/BUFInstructions.td | 32 ++++----- llvm/lib/Target/AMDGPU/DSInstructions.td | 8 +-- llvm/lib/Target/AMDGPU/FLATInstructions.td | 4 +- llvm/lib/Target/AMDGPU/MIMGInstructions.td | 20 +++--- llvm/lib/Target/AMDGPU/SIInstrFormats.td | 4 +- llvm/lib/Target/AMDGPU/SIInstrInfo.td | 76 ++++++++------------- llvm/lib/Target/AMDGPU/SIRegisterInfo.td | 2 +- llvm/lib/Target/AMDGPU/VOP3Instructions.td | 6 +- llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 4 +- llvm/lib/Target/AMDGPU/VOPInstructions.td | 8 +-- 10 files changed, 74 insertions(+), 90 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index e1c9f1609a02a..763b2f0ef80e5 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -183,15 +183,15 @@ class getMTBUFAsmOps { } class MTBUF_SetupAddr { - bits<1> offen = !if(!eq(addrKind, BUFAddrKind.OffEn), 1, - !if(!eq(addrKind, BUFAddrKind.BothEn), 1 , 0)); + bits<1> offen = !or(!eq(addrKind, BUFAddrKind.OffEn), + !eq(addrKind, BUFAddrKind.BothEn)); - bits<1> idxen = !if(!eq(addrKind, BUFAddrKind.IdxEn), 1, - !if(!eq(addrKind, BUFAddrKind.BothEn), 1 , 0)); + bits<1> idxen = !or(!eq(addrKind, BUFAddrKind.IdxEn), + !eq(addrKind, BUFAddrKind.BothEn)); - bits<1> addr64 = !if(!eq(addrKind, BUFAddrKind.Addr64), 1, 0); + bits<1> addr64 = !eq(addrKind, BUFAddrKind.Addr64); - bits<1> has_vaddr = !if(!eq(addrKind, BUFAddrKind.Offset), 0, 1); + bits<1> has_vaddr = !ne(addrKind, BUFAddrKind.Offset); } class MTBUF_Load_Pseudo { } class MUBUF_SetupAddr { - bits<1> offen = !if(!eq(addrKind, BUFAddrKind.OffEn), 1, - !if(!eq(addrKind, BUFAddrKind.BothEn), 1 , 0)); + bits<1> offen = !or(!eq(addrKind, BUFAddrKind.OffEn), + !eq(addrKind, BUFAddrKind.BothEn)); - bits<1> idxen = !if(!eq(addrKind, BUFAddrKind.IdxEn), 1, - !if(!eq(addrKind, BUFAddrKind.BothEn), 1 , 0)); + bits<1> idxen = !or(!eq(addrKind, BUFAddrKind.IdxEn), + !eq(addrKind, BUFAddrKind.BothEn)); - bits<1> addr64 = !if(!eq(addrKind, BUFAddrKind.Addr64), 1, 0); + bits<1> addr64 = !eq(addrKind, BUFAddrKind.Addr64); - bits<1> has_vaddr = !if(!eq(addrKind, BUFAddrKind.Offset), 0, 1); + bits<1> has_vaddr = !ne(addrKind, BUFAddrKind.Offset); } class MUBUF_Load_Pseudo .ret; } @@ -1857,7 +1857,7 @@ class Base_MUBUF_Real_gfx6_gfx7_gfx10 op, MUBUF_Pseudo ps, int ef> : let Inst{12} = ps.offen; let Inst{13} = ps.idxen; let Inst{14} = !if(ps.has_glc, glc, ps.glc_value); - let Inst{16} = !if(ps.lds, 1, 0); + let Inst{16} = ps.lds; let Inst{24-18} = op; let Inst{31-26} = 0x38; let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); @@ -2208,7 +2208,7 @@ class MUBUF_Real_vi op, MUBUF_Pseudo ps> : let Inst{12} = ps.offen; let Inst{13} = ps.idxen; let Inst{14} = !if(ps.has_glc, glc, ps.glc_value); - let Inst{16} = !if(ps.lds, 1, 0); + let Inst{16} = ps.lds; let Inst{17} = !if(ps.has_slc, slc, ?); let Inst{24-18} = op; let Inst{31-26} = 0x38; //encoding @@ -2258,7 +2258,7 @@ class MUBUF_Real_gfx80 op, MUBUF_Pseudo ps> : let Inst{12} = ps.offen; let Inst{13} = ps.idxen; let Inst{14} = !if(ps.has_glc, glc, ps.glc_value); - let Inst{16} = !if(ps.lds, 1, 0); + let Inst{16} = ps.lds; let Inst{17} = !if(ps.has_slc, slc, ?); let Inst{24-18} = op; let Inst{31-26} = 0x38; //encoding diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td index 96345d07c95dc..6f6cd23647030 100644 --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -166,12 +166,12 @@ class DS_1A1D_RET multiclass DS_1A1D_RET_mc { def "" : DS_1A1D_RET, - AtomicNoRet; + AtomicNoRet; let has_m0_read = 0 in { def _gfx9 : DS_1A1D_RET, AtomicNoRet; + !ne(NoRetOp, "")>; } } @@ -191,11 +191,11 @@ multiclass DS_1A2D_RET_mc { def "" : DS_1A2D_RET, - AtomicNoRet; + AtomicNoRet; let has_m0_read = 0 in { def _gfx9 : DS_1A2D_RET, - AtomicNoRet; + AtomicNoRet; } } diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index e7f6b0ca3b553..29a350d5d2a3a 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -63,9 +63,9 @@ class FLAT_Pseudo op, FLAT_Pseudo ps> : diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td index f56b8728e64c0..e9ee87283553e 100644 --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -148,7 +148,7 @@ class MIMG_Base let hasSideEffects = 0; // XXX ???? let DecoderNamespace = dns; - let isAsmParserOnly = !if(!eq(dns,""), 1, 0); + let isAsmParserOnly = !eq(dns, ""); } class MIMG @@ -308,13 +308,13 @@ multiclass MIMG_NoSampler_Src_Helper op, string asm, multiclass MIMG_NoSampler op, string asm, bit has_d16, bit mip = 0, bit isResInfo = 0> { def "" : MIMGBaseOpcode { - let Coordinates = !if(isResInfo, 0, 1); + let Coordinates = !not(isResInfo); let LodOrClampOrMip = mip; let HasD16 = has_d16; } let BaseOpcode = !cast(NAME), - mayLoad = !if(isResInfo, 0, 1) in { + mayLoad = !not(isResInfo) in { let VDataDwords = 1 in defm _V1 : MIMG_NoSampler_Src_Helper ; let VDataDwords = 2 in @@ -665,12 +665,12 @@ multiclass MIMG_Sampler op, AMDGPUSampleVariant sample, bit wqm = 0, bit isG16 = 0, bit isGetLod = 0, string asm = "image_sample"#sample.LowerCaseMod#!if(isG16, "_g16", "")> { def "" : MIMG_Sampler_BaseOpcode { - let HasD16 = !if(isGetLod, 0, 1); + let HasD16 = !not(isGetLod); let G16 = isG16; } let BaseOpcode = !cast(NAME), WQM = wqm, - mayLoad = !if(isGetLod, 0, 1) in { + mayLoad = !not(isGetLod) in { let VDataDwords = 1 in defm _V1 : MIMG_Sampler_Src_Helper; let VDataDwords = 2 in @@ -712,8 +712,8 @@ class MIMG_IntersectRay_gfx10 { let InOperandList = !con((ins AddrRC:$vaddr0, SReg_128:$srsrc), - !if(!eq(A16,1), (ins GFX10A16:$a16), (ins))); - let AsmString = opcode#" $vdata, $vaddr0, $srsrc"#!if(!eq(A16,1), "$a16", ""); + !if(A16, (ins GFX10A16:$a16), (ins))); + let AsmString = opcode#" $vdata, $vaddr0, $srsrc"#!if(A16, "$a16", ""); let nsa = 0; } @@ -722,15 +722,15 @@ class MIMG_IntersectRay_nsa_gfx10 : MIMG_nsa_gfx10 { let InOperandList = !con(nsah.AddrIns, (ins SReg_128:$srsrc), - !if(!eq(A16,1), (ins GFX10A16:$a16), (ins))); - let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $srsrc"#!if(!eq(A16,1), "$a16", ""); + !if(A16, (ins GFX10A16:$a16), (ins))); + let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $srsrc"#!if(A16, "$a16", ""); } multiclass MIMG_IntersectRay { def "" : MIMGBaseOpcode; let SubtargetPredicate = HasGFX10_BEncoding, AssemblerPredicate = HasGFX10_BEncoding, - AsmMatchConverter = !if(!eq(A16,1), "cvtIntersectRay", ""), + AsmMatchConverter = !if(A16, "cvtIntersectRay", ""), dmask = 0xf, unorm = 1, d16 = 0, diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td index 428c21c896d50..ca1cfc65c94a0 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td +++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td @@ -203,11 +203,11 @@ class InstSI DisableVIDecoder = 0; field bits<1> DisableDecoder = 0; - let isAsmParserOnly = !if(!eq(DisableDecoder{0}, {0}), 0, 1); + let isAsmParserOnly = !ne(DisableDecoder{0}, {0}); let AsmVariantName = AMDGPUAsmVariants.Default; // Avoid changing source registers in a way that violates constant bus read limitations. - let hasExtraSrcRegAllocReq = !if(VOP1,1,!if(VOP2,1,!if(VOP3,1,!if(VOPC,1,!if(SDWA,1, !if(VALU,1,0)))))); + let hasExtraSrcRegAllocReq = !or(VOP1, VOP2, VOP3, VOPC, SDWA, VALU); } class PseudoInstSI pattern = [], string asm = ""> diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 1aa4be8aa2865..86c54efe34809 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -259,31 +259,25 @@ def SIdenorm_mode : SDNode<"AMDGPUISD::DENORM_MODE", // Returns 1 if the source arguments have modifiers, 0 if they do not. // XXX - do f16 instructions? class isFloatType { - bit ret = - !if(!eq(SrcVT.Value, f16.Value), 1, - !if(!eq(SrcVT.Value, f32.Value), 1, - !if(!eq(SrcVT.Value, f64.Value), 1, - !if(!eq(SrcVT.Value, v2f16.Value), 1, - !if(!eq(SrcVT.Value, v4f16.Value), 1, - !if(!eq(SrcVT.Value, v2f32.Value), 1, - !if(!eq(SrcVT.Value, v2f64.Value), 1, - 0))))))); + bit ret = !or(!eq(SrcVT.Value, f16.Value), + !eq(SrcVT.Value, f32.Value), + !eq(SrcVT.Value, f64.Value), + !eq(SrcVT.Value, v2f16.Value), + !eq(SrcVT.Value, v4f16.Value), + !eq(SrcVT.Value, v2f32.Value), + !eq(SrcVT.Value, v2f64.Value)); } class isIntType { - bit ret = - !if(!eq(SrcVT.Value, i16.Value), 1, - !if(!eq(SrcVT.Value, i32.Value), 1, - !if(!eq(SrcVT.Value, i64.Value), 1, - 0))); + bit ret = !or(!eq(SrcVT.Value, i16.Value), + !eq(SrcVT.Value, i32.Value), + !eq(SrcVT.Value, i64.Value)); } class isPackedType { - bit ret = - !if(!eq(SrcVT.Value, v2i16.Value), 1, - !if(!eq(SrcVT.Value, v2f16.Value), 1, - !if(!eq(SrcVT.Value, v4f16.Value), 1, 0) - )); + bit ret = !or(!eq(SrcVT.Value, v2i16.Value), + !eq(SrcVT.Value, v2f16.Value), + !eq(SrcVT.Value, v4f16.Value)); } //===----------------------------------------------------------------------===// @@ -1393,8 +1387,8 @@ def HWREG { class getHwRegImm { int ret = !and(!or(Reg, - !or(!shl(Offset, 6), - !shl(!add(Size, -1), 11))), 65535); + !shl(Offset, 6), + !shl(!add(Size, -1), 11)), 65535); } //===----------------------------------------------------------------------===// @@ -1590,13 +1584,11 @@ class getVOP3SrcForVT { // Float or packed int class isModifierType { - bit ret = - !if(!eq(SrcVT.Value, f16.Value), 1, - !if(!eq(SrcVT.Value, f32.Value), 1, - !if(!eq(SrcVT.Value, f64.Value), 1, - !if(!eq(SrcVT.Value, v2f16.Value), 1, - !if(!eq(SrcVT.Value, v2i16.Value), 1, - 0))))); + bit ret = !or(!eq(SrcVT.Value, f16.Value), + !eq(SrcVT.Value, f32.Value), + !eq(SrcVT.Value, f64.Value), + !eq(SrcVT.Value, v2f16.Value), + !eq(SrcVT.Value, v2i16.Value)); } // Return type of input modifiers operand for specified input operand @@ -2114,14 +2106,6 @@ class getHasDPP .ret); } -class BitOr { - bit ret = !if(a, 1, !if(b, 1, 0)); -} - -class BitAnd { - bit ret = !if(a, !if(b, 1, 0), 0); -} - def PatGenMode { int NoPattern = 0; int Pattern = 1; @@ -2159,18 +2143,18 @@ class VOPProfile _ArgVT, bit _EnableF32SrcMods = 0, field Operand Src1ModSDWA = getSrcModSDWA.ret; - field bit HasDst = !if(!eq(DstVT.Value, untyped.Value), 0, 1); + field bit HasDst = !ne(DstVT.Value, untyped.Value); field bit HasDst32 = HasDst; field bit EmitDst = HasDst; // force dst encoding, see v_movreld_b32 special case field int NumSrcArgs = getNumSrcArgs.ret; - field bit HasSrc0 = !if(!eq(Src0VT.Value, untyped.Value), 0, 1); - field bit HasSrc1 = !if(!eq(Src1VT.Value, untyped.Value), 0, 1); - field bit HasSrc2 = !if(!eq(Src2VT.Value, untyped.Value), 0, 1); + field bit HasSrc0 = !ne(Src0VT.Value, untyped.Value); + field bit HasSrc1 = !ne(Src1VT.Value, untyped.Value); + field bit HasSrc2 = !ne(Src2VT.Value, untyped.Value); // TODO: Modifiers logic is somewhat adhoc here, to be refined later // HasModifiers affects the normal and DPP encodings. We take note of EnableF32SrcMods, which // enables modifiers for i32 type. - field bit HasModifiers = BitOr.ret, EnableF32SrcMods>.ret; + field bit HasModifiers = !or(isModifierType.ret, EnableF32SrcMods); // HasSrc*FloatMods affects the SDWA encoding. We ignore EnableF32SrcMods. field bit HasSrc0FloatMods = isFloatType.ret; @@ -2183,15 +2167,15 @@ class VOPProfile _ArgVT, bit _EnableF32SrcMods = 0, field bit HasSrc2IntMods = isIntType.ret; field bit HasSrc0Mods = HasModifiers; - field bit HasSrc1Mods = !if(HasModifiers, BitOr.ret, 0); - field bit HasSrc2Mods = !if(HasModifiers, BitOr.ret, 0); + field bit HasSrc1Mods = !if(HasModifiers, !or(HasSrc1FloatMods, HasSrc1IntMods), 0); + field bit HasSrc2Mods = !if(HasModifiers, !or(HasSrc2FloatMods, HasSrc2IntMods), 0); - field bit HasClamp = BitOr.ret, EnableClamp>.ret; + field bit HasClamp = !or(isModifierType.ret, EnableClamp); field bit HasSDWAClamp = EmitDst; - field bit HasFPClamp = BitAnd.ret, HasClamp>.ret; + field bit HasFPClamp = !and(isFloatType.ret, HasClamp); field bit HasIntClamp = !if(isFloatType.ret, 0, HasClamp); field bit HasClampLo = HasClamp; - field bit HasClampHi = BitAnd.ret, HasClamp>.ret; + field bit HasClampHi = !and(isPackedType.ret, HasClamp); field bit HasHigh = 0; field bit IsPacked = isPackedType.ret; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index ff1f5c4bc49b1..4bbd39c1a8e5e 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -149,7 +149,7 @@ multiclass SIRegLoHi16 regIdx, bit ArtificialHigh = 1, !cast(NAME#"_HI16")]> { let Namespace = "AMDGPU"; let SubRegIndices = [lo16, hi16]; - let CoveredBySubRegs = !if(ArtificialHigh,0,1); + let CoveredBySubRegs = !not(ArtificialHigh); let HWEncoding = regIdx; let HWEncoding{8} = HWEncodingHigh; } diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 8fea9403cc421..2a0b7466c7f2c 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -138,7 +138,7 @@ class VOP3Inst : VOPProf let IsMAI = !if(Features.IsMAI, 1, P.IsMAI); let IsPacked = !if(Features.IsPacked, 1, P.IsPacked); - let HasModifiers = !if(Features.IsPacked, !if(Features.IsMAI, 0, 1), P.HasModifiers); + let HasModifiers = !if(Features.IsPacked, !not(Features.IsMAI), P.HasModifiers); // FIXME: Hack to stop printing _e64 let Outs64 = (outs DstRC.RegClass:$vdst); @@ -277,7 +277,7 @@ class getInterp16Ins ArgVT> : VOPProfile { - let HasOMod = !if(!eq(DstVT.Value, f16.Value), 0, 1); + let HasOMod = !ne(DstVT.Value, f16.Value); let HasHigh = 1; let Outs64 = (outs VGPR_32:$vdst); diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index 393fc8b09d446..159731707eabd 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -226,7 +226,7 @@ foreach Type = ["I", "U"] in foreach Index = 0-3 in { // Defines patterns that extract each Index'ed 8bit from an unsigned // 32bit scalar value; - def Type#Index#"_8bit" : Extract; + def Type#Index#"_8bit" : Extract; // Defines multiplication patterns where the multiplication is happening on each // Index'ed 8bit of a 32bit scalar value. @@ -254,7 +254,7 @@ foreach Type = ["I", "U"] in foreach Index = 0-7 in { // Defines patterns that extract each Index'ed 4bit from an unsigned // 32bit scalar value; - def Type#Index#"_4bit" : Extract; + def Type#Index#"_4bit" : Extract; // Defines multiplication patterns where the multiplication is happening on each // Index'ed 8bit of a 32bit scalar value. diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index b27a1d31863df..8df1886022182 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -69,7 +69,7 @@ class VOP3Common pattern = [], let AsmMatchConverter = !if(isVOP3P, "cvtVOP3P", - !if(!or(P.HasModifiers, !or(P.HasOMod, P.HasIntClamp)), + !if(!or(P.HasModifiers, P.HasOMod, P.HasIntClamp), "cvtVOP3", "")); } @@ -626,7 +626,7 @@ class VOP_DPP_Pseudo pattern=[]> : string Mnemonic = OpName; string AsmOperands = P.AsmDPP; - let AsmMatchConverter = !if(!eq(P.HasModifiers,1), "cvtDPP", ""); + let AsmMatchConverter = !if(P.HasModifiers, "cvtDPP", ""); let SubtargetPredicate = HasDPP; let AssemblerPredicate = HasDPP; let AsmVariantName = !if(P.HasExtDPP, AMDGPUAsmVariants.DPP, @@ -681,7 +681,7 @@ class VOP_DPP Date: Wed, 21 Oct 2020 10:02:04 +0100 Subject: [PATCH 023/179] [InstructionSimplify] Precommit more tests for D89317. NFC. --- llvm/test/Transforms/InstSimplify/compare.ll | 143 ++++++++++++++++++- 1 file changed, 140 insertions(+), 3 deletions(-) diff --git a/llvm/test/Transforms/InstSimplify/compare.ll b/llvm/test/Transforms/InstSimplify/compare.ll index a029d0b3c60db..e0817f17df401 100644 --- a/llvm/test/Transforms/InstSimplify/compare.ll +++ b/llvm/test/Transforms/InstSimplify/compare.ll @@ -1765,7 +1765,7 @@ define i1 @cmp_through_addrspacecast(i32 addrspace(1)* %p1) { ; Test simplifications for: icmp (X+Y), (X+Z) -> icmp Y,Z ; Test the overflow check when the RHS has NSW set and constant Z is greater -; or equal than Y, then we know X+Y also can't overflow. +; than Y, then we know X+Y also can't overflow. define i1 @icmp_nsw_1(i32 %V) { ; CHECK-LABEL: @icmp_nsw_1( @@ -1797,6 +1797,130 @@ define i1 @icmp_nsw_2(i32 %V) { ret i1 %cmp } +define i1 @icmp_nsw_22(i32 %V) { +; CHECK-LABEL: @icmp_nsw_22( +; CHECK-NEXT: ret i1 true +; + %add5 = add nsw i32 %V, 5 + %add6 = add nsw i32 %V, 6 + %cmp = icmp slt i32 %add5, %add6 + ret i1 %cmp +} + +define i1 @icmp_nsw_23(i32 %V) { +; CHECK-LABEL: @icmp_nsw_23( +; CHECK-NEXT: [[ADD5:%.*]] = add nsw i32 [[V:%.*]], 5 +; CHECK-NEXT: [[ADD6:%.*]] = add i32 [[V]], 6 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[ADD5]], [[ADD6]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %add5 = add nsw i32 %V, 5 + %add6 = add i32 %V, 6 + %cmp = icmp slt i32 %add5, %add6 + ret i1 %cmp +} + +define i1 @icmp_nsw_false(i32 %V) { +; CHECK-LABEL: @icmp_nsw_false( +; CHECK-NEXT: [[ADD5:%.*]] = add nsw i32 [[V:%.*]], 6 +; CHECK-NEXT: [[ADD6:%.*]] = add i32 [[V]], 5 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[ADD5]], [[ADD6]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %add5 = add nsw i32 %V, 6 + %add6 = add i32 %V, 5 + %cmp = icmp slt i32 %add5, %add6 + ret i1 %cmp +} + +define i1 @icmp_nsw_false_2(i32 %V) { +; CHECK-LABEL: @icmp_nsw_false_2( +; CHECK-NEXT: ret i1 false +; + %add5 = add nsw i32 %V, 6 + %add6 = add nsw i32 %V, 5 + %cmp = icmp slt i32 %add5, %add6 + ret i1 %cmp +} + +define i1 @icmp_nsw_false_3(i32 %V) { +; CHECK-LABEL: @icmp_nsw_false_3( +; CHECK-NEXT: [[ADD5:%.*]] = add nsw i32 [[V:%.*]], 5 +; CHECK-NEXT: [[ADD6:%.*]] = add i32 [[V]], 5 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[ADD5]], [[ADD6]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %add5 = add nsw i32 %V, 5 + %add6 = add i32 %V, 5 + %cmp = icmp slt i32 %add5, %add6 + ret i1 %cmp +} + +define i1 @icmp_nsw_false_4(i32 %V) { +; CHECK-LABEL: @icmp_nsw_false_4( +; CHECK-NEXT: [[ADD5:%.*]] = add i32 [[V:%.*]], 6 +; CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[V]], 5 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[ADD5]], [[ADD6]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %add5 = add i32 %V, 6 + %add6 = add nsw i32 %V, 5 + %cmp = icmp slt i32 %add5, %add6 + ret i1 %cmp +} + +define i1 @icmp_nsw_i8(i8 %V) { +; CHECK-LABEL: @icmp_nsw_i8( +; CHECK-NEXT: [[ADD5:%.*]] = add i8 [[V:%.*]], 5 +; CHECK-NEXT: [[ADD6:%.*]] = add nsw i8 [[V]], 6 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[ADD5]], [[ADD6]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %add5 = add i8 %V, 5 + %add6 = add nsw i8 %V, 6 + %cmp = icmp slt i8 %add5, %add6 + ret i1 %cmp +} + +define i1 @icmp_nsw_i16(i16 %V) { +; CHECK-LABEL: @icmp_nsw_i16( +; CHECK-NEXT: [[ADD5:%.*]] = add i16 [[V:%.*]], 5 +; CHECK-NEXT: [[ADD6:%.*]] = add nsw i16 [[V]], 6 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i16 [[ADD5]], [[ADD6]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %add5 = add i16 %V, 5 + %add6 = add nsw i16 %V, 6 + %cmp = icmp slt i16 %add5, %add6 + ret i1 %cmp +} + +define i1 @icmp_nsw_i64(i64 %V) { +; CHECK-LABEL: @icmp_nsw_i64( +; CHECK-NEXT: [[ADD5:%.*]] = add i64 [[V:%.*]], 5 +; CHECK-NEXT: [[ADD6:%.*]] = add nsw i64 [[V]], 6 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[ADD5]], [[ADD6]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %add5 = add i64 %V, 5 + %add6 = add nsw i64 %V, 6 + %cmp = icmp slt i64 %add5, %add6 + ret i1 %cmp +} + +define <4 x i1> @icmp_nsw_vec(<4 x i32> %V) { +; CHECK-LABEL: @icmp_nsw_vec( +; CHECK-NEXT: [[ADD5:%.*]] = add <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[ADD6:%.*]] = add nsw <4 x i32> [[V]], +; CHECK-NEXT: [[CMP:%.*]] = icmp slt <4 x i32> [[ADD5]], [[ADD6]] +; CHECK-NEXT: ret <4 x i1> [[CMP]] +; + %add5 = add <4 x i32> %V, + %add6 = add nsw <4 x i32> %V, + %cmp = icmp slt <4 x i32> %add5, %add6 + ret <4 x i1> %cmp +} + define i1 @icmp_nsw_3(i32 %V) { ; CHECK-LABEL: @icmp_nsw_3( ; CHECK-NEXT: [[ADD5:%.*]] = add i32 [[V:%.*]], 5 @@ -1878,14 +2002,27 @@ define i1 @icmp_nsw_9(i32 %V1, i32 %V2) { define i1 @icmp_nsw_10(i32 %V) { ; CHECK-LABEL: @icmp_nsw_10( ; CHECK-NEXT: [[ADD5:%.*]] = add i32 [[V:%.*]], 5 -; CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[V]], 5 +; CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[V]], 6 ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[ADD6]], [[ADD5]] ; CHECK-NEXT: ret i1 [[CMP]] ; %add5 = add i32 %V, 5 - %add6 = add nsw i32 %V, 5 + %add6 = add nsw i32 %V, 6 %cmp = icmp sgt i32 %add6, %add5 ret i1 %cmp } +define i1 @icmp_nsw_11(i32 %V) { +; CHECK-LABEL: @icmp_nsw_11( +; CHECK-NEXT: [[ADD5:%.*]] = add i32 [[V:%.*]], -125 +; CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[V]], -99 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[ADD5]], [[ADD6]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %add5 = add i32 %V, -125 + %add6 = add nsw i32 %V, -99 + %cmp = icmp slt i32 %add5, %add6 + ret i1 %cmp +} + attributes #0 = { null_pointer_is_valid } From 88241ffb5636ebc0579d3ab8eeec78446a769c54 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 21 Oct 2020 10:21:50 +0100 Subject: [PATCH 024/179] [Passes] Move ADCE before DSE & LICM. The adjustment seems to have very little impact on optimizations. The only binary change with -O3 MultiSource/SPEC2000/SPEC2006 on X86 is in consumer-typeset and the size there actually decreases by -0.1%, with not significant changes in the stats. On its own, it is mildly positive in terms of compile-time, most likely due to LICM & DSE having to process slightly less instructions. It should also be unlikely that DSE/LICM make much new code dead. http://llvm-compile-time-tracker.com/compare.php?from=df63eedef64d715ce1f31843f7de9c11fe1e597f&to=e3bdfcf94a9eeae6e006d010464f0c1b3550577d&stat=instructions With DSE & MemorySSA, it gives some nice compile-time improvements, due to the fact that DSE can re-use the PDT from ADCE, if it does not make any changes: http://llvm-compile-time-tracker.com/compare.php?from=15fdd6cd7c24c745df1bb419e72ff66fd138aa7e&to=481f494515fc89cb7caea8d862e40f2c910dc994&stat=instructions Reviewed By: xbolva00 Differential Revision: https://reviews.llvm.org/D87322 --- llvm/lib/Passes/PassBuilder.cpp | 10 ++++++---- llvm/lib/Transforms/IPO/PassManagerBuilder.cpp | 7 +++++-- llvm/test/CodeGen/AMDGPU/opt-pipeline.ll | 10 ++++------ llvm/test/Other/new-pm-defaults.ll | 5 ++--- llvm/test/Other/new-pm-thinlto-defaults.ll | 5 ++--- .../test/Other/new-pm-thinlto-postlink-pgo-defaults.ll | 2 +- .../new-pm-thinlto-postlink-samplepgo-defaults.ll | 2 +- llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll | 2 +- .../Other/new-pm-thinlto-prelink-samplepgo-defaults.ll | 2 +- llvm/test/Other/opt-O2-pipeline.ll | 5 ++--- llvm/test/Other/opt-O3-pipeline-enable-matrix.ll | 5 ++--- llvm/test/Other/opt-O3-pipeline.ll | 5 ++--- llvm/test/Other/opt-Os-pipeline.ll | 5 ++--- 13 files changed, 31 insertions(+), 34 deletions(-) diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 6db7f3c88dd97..923c1bc11a981 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -750,6 +750,12 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, // redo DCE, etc. FPM.addPass(JumpThreadingPass()); FPM.addPass(CorrelatedValuePropagationPass()); + + // Finally, do an expensive DCE pass to catch all the dead code exposed by + // the simplifications and basic cleanup after all the simplifications. + // TODO: Investigate if this is too expensive. + FPM.addPass(ADCEPass()); + FPM.addPass(DSEPass()); FPM.addPass(createFunctionToLoopPassAdaptor( LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap), @@ -761,10 +767,6 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, for (auto &C : ScalarOptimizerLateEPCallbacks) C(FPM, Level); - // Finally, do an expensive DCE pass to catch all the dead code exposed by - // the simplifications and basic cleanup after all the simplifications. - // TODO: Investigate if this is too expensive. - FPM.addPass(ADCEPass()); FPM.addPass(SimplifyCFGPass()); FPM.addPass(InstCombinePass()); invokePeepholeEPCallbacks(FPM, Level); diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index 088f1e25f3d15..93c548a526f09 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -482,6 +482,11 @@ void PassManagerBuilder::addFunctionSimplificationPasses( if (OptLevel > 1) { MPM.add(createJumpThreadingPass()); // Thread jumps MPM.add(createCorrelatedValuePropagationPass()); + } + MPM.add(createAggressiveDCEPass()); // Delete dead instructions + + // TODO: Investigate if this is too expensive at O1. + if (OptLevel > 1) { MPM.add(createDeadStoreEliminationPass()); // Delete dead stores MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); } @@ -491,8 +496,6 @@ void PassManagerBuilder::addFunctionSimplificationPasses( if (RerollLoops) MPM.add(createLoopRerollPass()); - // TODO: Investigate if this is too expensive at O1. - MPM.add(createAggressiveDCEPass()); // Delete dead instructions MPM.add(createCFGSimplificationPass()); // Merge & remove BBs // Clean up after everything. MPM.add(createInstructionCombiningPass()); diff --git a/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll b/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll index 2c157cb801863..e99428689f501 100644 --- a/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll @@ -513,9 +513,10 @@ ; GCN-O2-NEXT: Lazy Value Information Analysis ; GCN-O2-NEXT: Jump Threading ; GCN-O2-NEXT: Value Propagation +; GCN-O2-NEXT: Post-Dominator Tree Construction +; GCN-O2-NEXT: Aggressive Dead Code Elimination ; GCN-O2-NEXT: Basic Alias Analysis (stateless AA impl) ; GCN-O2-NEXT: Function Alias Analysis Results -; GCN-O2-NEXT: Post-Dominator Tree Construction ; GCN-O2-NEXT: Memory SSA ; GCN-O2-NEXT: Dead Store Elimination ; GCN-O2-NEXT: Natural Loop Information @@ -528,8 +529,6 @@ ; GCN-O2-NEXT: Lazy Block Frequency Analysis ; GCN-O2-NEXT: Loop Pass Manager ; GCN-O2-NEXT: Loop Invariant Code Motion -; GCN-O2-NEXT: Post-Dominator Tree Construction -; GCN-O2-NEXT: Aggressive Dead Code Elimination ; GCN-O2-NEXT: Simplify the CFG ; GCN-O2-NEXT: Dominator Tree Construction ; GCN-O2-NEXT: Basic Alias Analysis (stateless AA impl) @@ -876,9 +875,10 @@ ; GCN-O3-NEXT: Lazy Value Information Analysis ; GCN-O3-NEXT: Jump Threading ; GCN-O3-NEXT: Value Propagation +; GCN-O3-NEXT: Post-Dominator Tree Construction +; GCN-O3-NEXT: Aggressive Dead Code Elimination ; GCN-O3-NEXT: Basic Alias Analysis (stateless AA impl) ; GCN-O3-NEXT: Function Alias Analysis Results -; GCN-O3-NEXT: Post-Dominator Tree Construction ; GCN-O3-NEXT: Memory SSA ; GCN-O3-NEXT: Dead Store Elimination ; GCN-O3-NEXT: Natural Loop Information @@ -891,8 +891,6 @@ ; GCN-O3-NEXT: Lazy Block Frequency Analysis ; GCN-O3-NEXT: Loop Pass Manager ; GCN-O3-NEXT: Loop Invariant Code Motion -; GCN-O3-NEXT: Post-Dominator Tree Construction -; GCN-O3-NEXT: Aggressive Dead Code Elimination ; GCN-O3-NEXT: Simplify the CFG ; GCN-O3-NEXT: Dominator Tree Construction ; GCN-O3-NEXT: Basic Alias Analysis (stateless AA impl) diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll index 2576f939c28ea..af1537581713c 100644 --- a/llvm/test/Other/new-pm-defaults.ll +++ b/llvm/test/Other/new-pm-defaults.ll @@ -202,16 +202,15 @@ ; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis ; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass ; CHECK-O23SZ-NEXT: Invalidating analysis: LazyValueAnalysis +; CHECK-O-NEXT: Running pass: ADCEPass +; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis ; CHECK-O23SZ-NEXT: Running pass: DSEPass -; CHECK-O23SZ-NEXT: Running analysis: PostDominatorTreeAnalysis ; CHECK-O23SZ-NEXT: Starting llvm::Function pass manager run. ; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass ; CHECK-O23SZ-NEXT: Running pass: LCSSAPass ; CHECK-O23SZ-NEXT: Finished llvm::Function pass manager run. ; CHECK-O23SZ-NEXT: Running pass: LICMPass ; CHECK-EP-SCALAR-LATE-NEXT: Running pass: NoOpFunctionPass -; CHECK-O-NEXT: Running pass: ADCEPass -; CHECK-O1-NEXT: Running analysis: PostDominatorTreeAnalysis ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-EP-PEEPHOLE-NEXT: Running pass: NoOpFunctionPass diff --git a/llvm/test/Other/new-pm-thinlto-defaults.ll b/llvm/test/Other/new-pm-thinlto-defaults.ll index bc1e6577f73a6..ad16091280cfc 100644 --- a/llvm/test/Other/new-pm-thinlto-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-defaults.ll @@ -175,15 +175,14 @@ ; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis ; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass ; CHECK-O23SZ-NEXT: Invalidating analysis: LazyValueAnalysis +; CHECK-O-NEXT: Running pass: ADCEPass +; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis ; CHECK-O23SZ-NEXT: Running pass: DSEPass -; CHECK-O23SZ-NEXT: Running analysis: PostDominatorTreeAnalysis on foo ; CHECK-O23SZ-NEXT: Starting llvm::Function pass manager run ; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass ; CHECK-O23SZ-NEXT: Running pass: LCSSAPass ; CHECK-O23SZ-NEXT: Finished llvm::Function pass manager run ; CHECK-O23SZ-NEXT: Running pass: LICMPass on Loop at depth 1 containing: %loop -; CHECK-O-NEXT: Running pass: ADCEPass -; CHECK-O1-NEXT: Running analysis: PostDominatorTreeAnalysis ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O-NEXT: Finished llvm::Function pass manager run. diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll index 0e287cc156b8b..84a51d8daa096 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll @@ -148,13 +148,13 @@ ; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis ; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass ; CHECK-O23SZ-NEXT: Invalidating analysis: LazyValueAnalysis +; CHECK-O-NEXT: Running pass: ADCEPass ; CHECK-O23SZ-NEXT: Running pass: DSEPass ; CHECK-O23SZ-NEXT: Starting {{.*}}Function pass manager run ; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass ; CHECK-O23SZ-NEXT: Running pass: LCSSAPass ; CHECK-O23SZ-NEXT: Finished {{.*}}Function pass manager run ; CHECK-O23SZ-NEXT: Running pass: LICMPass -; CHECK-O-NEXT: Running pass: ADCEPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O-NEXT: Finished {{.*}}Function pass manager run. diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll index da63b9580d767..4616fc39d15c4 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -156,13 +156,13 @@ ; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis ; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass ; CHECK-O23SZ-NEXT: Invalidating analysis: LazyValueAnalysis +; CHECK-O-NEXT: Running pass: ADCEPass ; CHECK-O23SZ-NEXT: Running pass: DSEPass ; CHECK-O23SZ-NEXT: Starting {{.*}}Function pass manager run ; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass ; CHECK-O23SZ-NEXT: Running pass: LCSSAPass ; CHECK-O23SZ-NEXT: Finished {{.*}}Function pass manager run ; CHECK-O23SZ-NEXT: Running pass: LICMPass -; CHECK-O-NEXT: Running pass: ADCEPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O3-NEXT: Running pass: ControlHeightReductionPass on foo diff --git a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll index 11bd207781d86..ffcc6ecc789ad 100644 --- a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll @@ -198,13 +198,13 @@ ; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis ; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass ; CHECK-O23SZ-NEXT: Invalidating analysis: LazyValueAnalysis +; CHECK-O-NEXT: Running pass: ADCEPass ; CHECK-O23SZ-NEXT: Running pass: DSEPass ; CHECK-O23SZ-NEXT: Starting {{.*}}Function pass manager run ; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass ; CHECK-O23SZ-NEXT: Running pass: LCSSAPass ; CHECK-O23SZ-NEXT: Finished {{.*}}Function pass manager run ; CHECK-O23SZ-NEXT: Running pass: LICMPass -; CHECK-O-NEXT: Running pass: ADCEPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O3-NEXT: Running pass: ControlHeightReductionPass on foo diff --git a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll index 19a1fd551bf17..1bb1bc0c4aa1d 100644 --- a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll @@ -154,13 +154,13 @@ ; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis ; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass ; CHECK-O23SZ-NEXT: Invalidating analysis: LazyValueAnalysis +; CHECK-O-NEXT: Running pass: ADCEPass ; CHECK-O23SZ-NEXT: Running pass: DSEPass ; CHECK-O23SZ-NEXT: Starting {{.*}}Function pass manager run ; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass ; CHECK-O23SZ-NEXT: Running pass: LCSSAPass ; CHECK-O23SZ-NEXT: Finished {{.*}}Function pass manager run ; CHECK-O23SZ-NEXT: Running pass: LICMPass -; CHECK-O-NEXT: Running pass: ADCEPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O3-NEXT: Running pass: ControlHeightReductionPass on foo diff --git a/llvm/test/Other/opt-O2-pipeline.ll b/llvm/test/Other/opt-O2-pipeline.ll index 4d944208df7a8..e5f43715530da 100644 --- a/llvm/test/Other/opt-O2-pipeline.ll +++ b/llvm/test/Other/opt-O2-pipeline.ll @@ -160,9 +160,10 @@ ; CHECK-NEXT: Lazy Value Information Analysis ; CHECK-NEXT: Jump Threading ; CHECK-NEXT: Value Propagation +; CHECK-NEXT: Post-Dominator Tree Construction +; CHECK-NEXT: Aggressive Dead Code Elimination ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Post-Dominator Tree Construction ; CHECK-NEXT: Memory SSA ; CHECK-NEXT: Dead Store Elimination ; CHECK-NEXT: Natural Loop Information @@ -175,8 +176,6 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Loop Pass Manager ; CHECK-NEXT: Loop Invariant Code Motion -; CHECK-NEXT: Post-Dominator Tree Construction -; CHECK-NEXT: Aggressive Dead Code Elimination ; CHECK-NEXT: Simplify the CFG ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) diff --git a/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll b/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll index 5c3f2a57e7e4b..1f3d763be601e 100644 --- a/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll +++ b/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll @@ -165,9 +165,10 @@ ; CHECK-NEXT: Lazy Value Information Analysis ; CHECK-NEXT: Jump Threading ; CHECK-NEXT: Value Propagation +; CHECK-NEXT: Post-Dominator Tree Construction +; CHECK-NEXT: Aggressive Dead Code Elimination ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Post-Dominator Tree Construction ; CHECK-NEXT: Memory SSA ; CHECK-NEXT: Dead Store Elimination ; CHECK-NEXT: Natural Loop Information @@ -180,8 +181,6 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Loop Pass Manager ; CHECK-NEXT: Loop Invariant Code Motion -; CHECK-NEXT: Post-Dominator Tree Construction -; CHECK-NEXT: Aggressive Dead Code Elimination ; CHECK-NEXT: Simplify the CFG ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) diff --git a/llvm/test/Other/opt-O3-pipeline.ll b/llvm/test/Other/opt-O3-pipeline.ll index d910ad52ade6b..99f401973699b 100644 --- a/llvm/test/Other/opt-O3-pipeline.ll +++ b/llvm/test/Other/opt-O3-pipeline.ll @@ -165,9 +165,10 @@ ; CHECK-NEXT: Lazy Value Information Analysis ; CHECK-NEXT: Jump Threading ; CHECK-NEXT: Value Propagation +; CHECK-NEXT: Post-Dominator Tree Construction +; CHECK-NEXT: Aggressive Dead Code Elimination ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Post-Dominator Tree Construction ; CHECK-NEXT: Memory SSA ; CHECK-NEXT: Dead Store Elimination ; CHECK-NEXT: Natural Loop Information @@ -180,8 +181,6 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Loop Pass Manager ; CHECK-NEXT: Loop Invariant Code Motion -; CHECK-NEXT: Post-Dominator Tree Construction -; CHECK-NEXT: Aggressive Dead Code Elimination ; CHECK-NEXT: Simplify the CFG ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) diff --git a/llvm/test/Other/opt-Os-pipeline.ll b/llvm/test/Other/opt-Os-pipeline.ll index d89f9071470d4..a38062ac5a5e5 100644 --- a/llvm/test/Other/opt-Os-pipeline.ll +++ b/llvm/test/Other/opt-Os-pipeline.ll @@ -146,9 +146,10 @@ ; CHECK-NEXT: Lazy Value Information Analysis ; CHECK-NEXT: Jump Threading ; CHECK-NEXT: Value Propagation +; CHECK-NEXT: Post-Dominator Tree Construction +; CHECK-NEXT: Aggressive Dead Code Elimination ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Post-Dominator Tree Construction ; CHECK-NEXT: Memory SSA ; CHECK-NEXT: Dead Store Elimination ; CHECK-NEXT: Natural Loop Information @@ -161,8 +162,6 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Loop Pass Manager ; CHECK-NEXT: Loop Invariant Code Motion -; CHECK-NEXT: Post-Dominator Tree Construction -; CHECK-NEXT: Aggressive Dead Code Elimination ; CHECK-NEXT: Simplify the CFG ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) From 6ec3872845dbb4d98a9e21ba43428ba2c023209b Mon Sep 17 00:00:00 2001 From: Alex Zinenko Date: Fri, 16 Oct 2020 11:40:34 +0200 Subject: [PATCH 025/179] [mlir] ODS: support TableGen dag objects to specify OpBuilder parameters Historically, custom builder specification in OpBuilder has been accepting the formal parameter list for the builder method as a raw string containing C++. While this worked well to connect the signature and the body, this became problematic when ODS needs to manipulate the parameter list, e.g. to inject OpBuilder or to trim default values when generating the definition. This has also become inconsistent with other method declarations, in particular in interface definitions. Introduce the possibility to define OpBuilder formal parameters using a TableGen dag similarly to other methods. Additionally, introduce a mechanism to declare parameters with default values using an additional class. This mechanism can be reused in other methods. The string-based builder signature declaration is deprecated and will be removed after a transition period. Reviewed By: jpienaar Differential Revision: https://reviews.llvm.org/D89470 --- mlir/docs/OpDefinitions.md | 91 ++++++++++++---- mlir/include/mlir/IR/OpBase.td | 58 ++++++++--- mlir/test/mlir-tblgen/op-decl.td | 8 +- mlir/test/mlir-tblgen/op-error.td | 36 +++++++ mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp | 110 ++++++++++++++++---- 5 files changed, 247 insertions(+), 56 deletions(-) create mode 100644 mlir/test/mlir-tblgen/op-error.td diff --git a/mlir/docs/OpDefinitions.md b/mlir/docs/OpDefinitions.md index 96c257cef08cf..f44765204dc08 100644 --- a/mlir/docs/OpDefinitions.md +++ b/mlir/docs/OpDefinitions.md @@ -567,51 +567,104 @@ complete list. #### Custom builder methods However, if the above cases cannot satisfy all needs, you can define additional -convenience build methods with `OpBuilder`. +convenience build methods in the `builders` field as follows. -`OpBuilder` is a class that takes the parameter list and the optional `build()` -method body. They are separated because we need to generate op declaration and -definition into separate files. The parameter list should not include `OpBuilder -&builder, OperationState &state` as they will be inserted automatically and the -placeholders `$_builder` and `$_state` used. For legacy/to be deprecated reason -if the `OpBuilder` parameter starts with `OpBuilder` param, then the parameter -is used. If the `body` is not provided, only the builder declaration will be -generated; this provides a way to define complicated builders entirely in C++ -files. +```tablegen +def MyOp : Op<"my_op", []> { + let arguments = (ins F32Attr:$attr); + + let builders = [ + OpBuilderDAG<(ins "float":$val)> + ]; +} +``` + +The `builders` field is a list of custom builders that are added to the Op +class. In this example, we provide a convenience builder that takes a floating +point value instead of an attribute. The `ins` prefix is common to many function +declarations in ODS, which use a TableGen [`dag`](#tablegen-syntax). What +follows is a comma-separated list of types (quoted string) and names prefixed +with the `$` sign. This will generate the declaration of a builder method that +looks like: + +```c++ +class MyOp : /*...*/ { + /*...*/ + static void build(::mlir::OpBuilder &builder, ::mlir::OperationState &state, + float val); +}; +``` -For example, for the following op: +Note that the method has two additional leading arguments. These arguments are +useful to construct the operation. In particular, the method must populate +`state` with attributes, operands, regions and result types of the operation to +be constructed. `builder` can be used to construct any IR objects that belong to +the Op, such as types or nested operations. Since the type and name are +generated as is in the C++ code, they should be valid C++ constructs for a type +(in the namespace of the Op) and an identifier (e.g., `class` is not a valid +identifier). + +Implementations of the builder can be provided directly in ODS, using TableGen +code block as follows. ```tablegen def MyOp : Op<"my_op", []> { let arguments = (ins F32Attr:$attr); - let results = (outs); + let builders = [ + OpBuilderDAG<(ins "float":$val), [{ + $_state.addAttribute("attr", $_builder.getF32FloatAttr(val)); + }]> + ]; } ``` -If we want to define a builder with a default value for the only attribute, we -can add into `MyOp`: +The equivalents of `builder` and `state` arguments are available as `$_builder` +and `$_state` special variables. The named arguments listed in the `ins` part +are available directly, e.g. `val`. The body of the builder will be generated by +substituting special variables and should otherwise be valid C++. While there is +no limitation on the code size, we encourage one to define only short builders +inline in ODS and put definitions of longer builders in C++ files. + +Finally, if some arguments need a default value, they can be defined using +`CArg` to wrap the type and this value as follows. ```tablegen -def MyOp : ... { - ... +def MyOp : Op<"my_op", []> { + let arguments = (ins F32Attr:$attr); let builders = [ - OpBuilder<"float val = 0.5f", [{ + OpBuilderDAG<(ins CArg<"float", "0.5f">:$val), [{ $_state.addAttribute("attr", $_builder.getF32FloatAttr(val)); }]> ]; } ``` -The generated builder will look like: +The generated code will use default value in the declaration, but not in the +definition, as required by C++. ```c++ -static void build(OpBuilder &builder, OperationState &state, float val = 0.5f) { +/* Header file. */ +class MyOp : /*...*/ { + /*...*/ + static void build(::mlir::OpBuilder &builder, ::mlir::OperationState &state, + float val = 0.5f); +}; + +/* Source file. */ +MyOp::build(::mlir::OpBuilder &builder, ::mlir::OperationState &state, + float val) { state.addAttribute("attr", builder.getF32FloatAttr(val)); } ``` +**Deprecated:** `OpBuilder` class allows one to specify the custom builder +signature as a raw string, without separating parameters into different `dag` +arguments. It also supports leading parameters of `OpBuilder &` and +`OperationState &` types, which will be used instead of the autogenerated ones +if present. + ### Custom parser and printer methods Functions to parse and print the operation's custom assembly form. diff --git a/mlir/include/mlir/IR/OpBase.td b/mlir/include/mlir/IR/OpBase.td index e09c18c0e1a14..82ef2dffca065 100644 --- a/mlir/include/mlir/IR/OpBase.td +++ b/mlir/include/mlir/IR/OpBase.td @@ -1804,6 +1804,13 @@ def NoRegionArguments : NativeOpTrait<"NoRegionArguments">; // Marker used to identify the argument list for an op or interface method. def ins; +// This class represents a typed argument with optional default value for C +// function signatures, e.g. builders or methods. +class CArg { + string type = ty; + string defaultValue = value; +} + // OpInterfaceTrait corresponds to a specific 'OpInterface' class defined in // C++. The purpose to wrap around C++ symbol string with this class is to make // interfaces specified for ops in TableGen less alien and more integrated. @@ -1923,6 +1930,15 @@ def region; // Marker used to identify the successor list for an op. def successor; +// Base class for custom builders. This is a transient class that will go away +// when the transition to the DAG form of builder declaration is complete. +// Should not be used directly. +class OpBuilderBase { + string params = ?; + dag dagParams = dp; + code body = b; +} + // Class for defining a custom builder. // // TableGen generates several generic builders for each op by default (see @@ -1932,22 +1948,40 @@ def successor; // The signature of the builder is always // // ```c++ -// static void build(OpBuilder &builder, OperationState &state, +// static void build(::mlir::OpBuilder &builder, ::mlir::OperationState &state, // ...) { // ... // } // ``` // -// To define a custom builder, the parameter list (*excluding* the `Builder -// *builder, OperationState &state` part) and body should be passed in -// as separate template arguments to this class. This is because we generate -// op declaration and definition into separate files. If an empty string is -// passed in for `body`, then *only* the builder declaration will be -// generated; this provides a way to define complicated builders entirely -// in C++. -class OpBuilder { - string params = p; - code body = b; +// To define a custom builder, the parameter list (*excluding* the +// `OpBuilder &builder, OperationState &state` part) and body should be passed +// in as separate template arguments to this class. The parameter list is a +// TableGen DAG with `ins` operation with named arguments, which has either: +// - string initializers ("Type":$name) to represent a typed parameter, or +// - CArg-typed initializers (CArg<"Type", "default">:$name) to represent a +// typed parameter that may have a default value. +// The type string is used verbatim to produce code and, therefore, must be a +// valid C++ type. It is used inside the C++ namespace of the parent Op's +// dialect; explicit namespace qualification like `::mlir` may be necessary if +// Ops are not placed inside the `mlir` namespace. The default value string is +// used verbatim to produce code and must be a valid C++ initializer the given +// type. For example, the following signature specification +// +// ``` +// OpBuilderDAG<(ins "int":$integerArg, CArg<"float", "3.0f">:$floatArg)> +// ``` +// +// has an integer parameter and a float parameter with a default value. +// +// If an empty string is passed in for `body`, then *only* the builder +// declaration will be generated; this provides a way to define complicated +// builders entirely in C++. +class OpBuilderDAG : OpBuilderBase; + +// Deprecated version of OpBuilder that takes the builder signature as string. +class OpBuilder : OpBuilderBase<(ins), b> { + let params = p; } // A base decorator class that may optionally be added to OpVariables. @@ -2025,7 +2059,7 @@ class Op props = []> { // ValueRange operands, // ArrayRef attributes); // ``` - list builders = ?; + list builders = ?; // Avoid generating default build functions. Custom builders must be // provided. diff --git a/mlir/test/mlir-tblgen/op-decl.td b/mlir/test/mlir-tblgen/op-decl.td index 4ff77dc2c3f74..29438f1836a77 100644 --- a/mlir/test/mlir-tblgen/op-decl.td +++ b/mlir/test/mlir-tblgen/op-decl.td @@ -33,7 +33,9 @@ def NS_AOp : NS_Op<"a_op", [IsolatedFromAbove, IsolatedFromAbove]> { AnyRegion:$someRegion, VariadicRegion:$someRegions ); - let builders = [OpBuilder<"Value val">]; + let builders = [OpBuilderDAG<(ins "Value":$val)>, + OpBuilderDAG<(ins CArg<"int", "0">:$integer)>, + OpBuilder<"double deprecatedForm">]; let parser = [{ foo }]; let printer = [{ bar }]; let verifier = [{ baz }]; @@ -81,6 +83,8 @@ def NS_AOp : NS_Op<"a_op", [IsolatedFromAbove, IsolatedFromAbove]> { // CHECK: ::mlir::FloatAttr attr2Attr() // CHECK: ::llvm::Optional< ::llvm::APFloat > attr2(); // CHECK: static void build(::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &odsState, Value val); +// CHECK: static void build(::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &odsState, int integer = 0); +// CHECK: static void build(::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &odsState, double deprecatedForm); // CHECK: static void build(::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &odsState, ::mlir::Type r, ::mlir::TypeRange s, ::mlir::Value a, ::mlir::ValueRange b, ::mlir::IntegerAttr attr1, /*optional*/::mlir::FloatAttr attr2, unsigned someRegionsCount) // CHECK: static void build(::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &odsState, ::mlir::Type r, ::mlir::TypeRange s, ::mlir::Value a, ::mlir::ValueRange b, uint32_t attr1, /*optional*/::mlir::FloatAttr attr2, unsigned someRegionsCount) // CHECK: static void build(::mlir::OpBuilder &, ::mlir::OperationState &odsState, ::mlir::TypeRange resultTypes, ::mlir::ValueRange operands, ::llvm::ArrayRef<::mlir::NamedAttribute> attributes, unsigned numRegions) @@ -250,7 +254,7 @@ def NS_JOp : NS_Op<"op_with_InferTypeOpInterface_interface", [DeclareOpInterface def NS_SkipDefaultBuildersOp : NS_Op<"skip_default_builders", []> { let skipDefaultBuilders = 1; - let builders = [OpBuilder<"Value val">]; + let builders = [OpBuilderDAG<(ins "Value":$val)>]; } // CHECK-LABEL: NS::SkipDefaultBuildersOp declarations diff --git a/mlir/test/mlir-tblgen/op-error.td b/mlir/test/mlir-tblgen/op-error.td new file mode 100644 index 0000000000000..b5fea66287a97 --- /dev/null +++ b/mlir/test/mlir-tblgen/op-error.td @@ -0,0 +1,36 @@ +// RUN: not mlir-tblgen -gen-op-decls -I %S/../../include -DERROR1 %s 2>&1 | FileCheck --check-prefix=ERROR1 %s +// RUN: not mlir-tblgen -gen-op-decls -I %S/../../include -DERROR2 %s 2>&1 | FileCheck --check-prefix=ERROR2 %s +// RUN: not mlir-tblgen -gen-op-decls -I %S/../../include -DERROR3 %s 2>&1 | FileCheck --check-prefix=ERROR3 %s + +include "mlir/IR/OpBase.td" + +def Test_Dialect : Dialect { + let name = "test_dialect"; +} + +#ifdef ERROR1 +// ERROR1: error: expected 'ins' +def OpInsMissing : Op { + let builders = [ + OpBuilderDAG<(outs)> + ]; +} +#endif + +#ifdef ERROR2 +// ERROR2: error: expected an argument with default value after other arguments with default values +def OpDefaultValueNotTrailing : Op { + let builders = [ + OpBuilderDAG<(ins CArg<"int", "42">, "int")> + ]; +} +#endif + +#ifdef ERROR3 +// ERROR3: error: expected an argument with default value after other arguments with default values +def OpDefaultValueNotTrailing : Op { + let builders = [ + OpBuilderDAG<(ins CArg<"int", "42">, CArg<"int">)> + ]; +} +#endif diff --git a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp index 3bcf021145556..f296b32849079 100644 --- a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp +++ b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp @@ -1144,6 +1144,82 @@ void OpEmitter::genUseAttrAsResultTypeBuilder() { body << " }\n"; } +/// Returns a signature of the builder as defined by a dag-typed initializer. +/// Updates the context `fctx` to enable replacement of $_builder and $_state +/// in the body. Reports errors at `loc`. +static std::string builderSignatureFromDAG(const DagInit *init, + ArrayRef loc, + FmtContext &fctx) { + auto *defInit = dyn_cast(init->getOperator()); + if (!defInit || !defInit->getDef()->getName().equals("ins")) + PrintFatalError(loc, "expected 'ins' in builders"); + + // Inject builder and state arguments. + llvm::SmallVector arguments; + arguments.reserve(init->getNumArgs() + 2); + arguments.push_back(llvm::formatv("::mlir::OpBuilder &{0}", builder).str()); + arguments.push_back( + llvm::formatv("::mlir::OperationState &{0}", builderOpState).str()); + + // Accept either a StringInit or a DefInit with two string values as dag + // arguments. The former corresponds to the type, the latter to the type and + // the default value. Similarly to C++, once an argument with a default value + // is detected, the following arguments must have default values as well. + bool seenDefaultValue = false; + for (unsigned i = 0, e = init->getNumArgs(); i < e; ++i) { + // If no name is provided, generate one. + StringInit *argName = init->getArgName(i); + std::string name = + argName ? argName->getValue().str() : "odsArg" + std::to_string(i); + + Init *argInit = init->getArg(i); + StringRef type; + std::string defaultValue; + if (StringInit *strType = dyn_cast(argInit)) { + type = strType->getValue(); + } else { + const Record *typeAndDefaultValue = cast(argInit)->getDef(); + type = typeAndDefaultValue->getValueAsString("type"); + StringRef defaultValueRef = + typeAndDefaultValue->getValueAsString("defaultValue"); + if (!defaultValueRef.empty()) { + seenDefaultValue = true; + defaultValue = llvm::formatv(" = {0}", defaultValueRef).str(); + } + } + if (seenDefaultValue && defaultValue.empty()) + PrintFatalError(loc, + "expected an argument with default value after other " + "arguments with default values"); + arguments.push_back( + llvm::formatv("{0} {1}{2}", type, name, defaultValue).str()); + } + + fctx.withBuilder(builder); + fctx.addSubst("_state", builderOpState); + + return llvm::join(arguments, ", "); +} + +// Returns a signature fo the builder as defined by a string initializer, +// optionally injecting the builder and state arguments. +// TODO: to be removed after the transition is complete. +static std::string builderSignatureFromString(StringRef params, + FmtContext &fctx) { + bool skipParamGen = params.startswith("OpBuilder") || + params.startswith("mlir::OpBuilder") || + params.startswith("::mlir::OpBuilder"); + if (skipParamGen) + return params.str(); + + fctx.withBuilder(builder); + fctx.addSubst("_state", builderOpState); + return std::string(llvm::formatv("::mlir::OpBuilder &{0}, " + "::mlir::OperationState &{1}{2}{3}", + builder, builderOpState, + params.empty() ? "" : ", ", params)); +} + void OpEmitter::genBuilder() { // Handle custom builders if provided. // TODO: Create wrapper class for OpBuilder to hide the native @@ -1153,35 +1229,23 @@ void OpEmitter::genBuilder() { if (listInit) { for (Init *init : listInit->getValues()) { Record *builderDef = cast(init)->getDef(); - StringRef params = builderDef->getValueAsString("params").trim(); - // TODO: Remove this and just generate the builder/state always. - bool skipParamGen = params.startswith("OpBuilder") || - params.startswith("mlir::OpBuilder") || - params.startswith("::mlir::OpBuilder"); + llvm::Optional params = + builderDef->getValueAsOptionalString("params"); + FmtContext fctx; + std::string paramStr = + params.hasValue() ? builderSignatureFromString(params->trim(), fctx) + : builderSignatureFromDAG( + builderDef->getValueAsDag("dagParams"), + op.getLoc(), fctx); + StringRef body = builderDef->getValueAsString("body"); bool hasBody = !body.empty(); - OpMethod::Property properties = hasBody ? OpMethod::MP_Static : OpMethod::MP_StaticDeclaration; - std::string paramStr = - skipParamGen ? params.str() - : llvm::formatv("::mlir::OpBuilder &{0}, " - "::mlir::OperationState &{1}{2}{3}", - builder, builderOpState, - params.empty() ? "" : ", ", params) - .str(); auto *method = opClass.addMethodAndPrune("void", "build", properties, paramStr); - if (hasBody) { - if (skipParamGen) { - method->body() << body; - } else { - FmtContext fctx; - fctx.withBuilder(builder); - fctx.addSubst("_state", builderOpState); - method->body() << tgfmt(body, &fctx); - } - } + if (hasBody) + method->body() << tgfmt(body, &fctx); } } if (op.skipDefaultBuilders()) { From e86a70ce3defd7df85ac13879da815025ffa50a3 Mon Sep 17 00:00:00 2001 From: Sjoerd Meijer Date: Wed, 21 Oct 2020 10:53:28 +0100 Subject: [PATCH 026/179] [InstructionSimplify] And precommit more tests for D89317. NFC. --- llvm/test/Transforms/InstSimplify/compare.ll | 39 ++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/llvm/test/Transforms/InstSimplify/compare.ll b/llvm/test/Transforms/InstSimplify/compare.ll index e0817f17df401..dab1937adc145 100644 --- a/llvm/test/Transforms/InstSimplify/compare.ll +++ b/llvm/test/Transforms/InstSimplify/compare.ll @@ -1797,6 +1797,45 @@ define i1 @icmp_nsw_2(i32 %V) { ret i1 %cmp } +define i1 @icmp_nsw_commute(i32 %V) { +; CHECK-LABEL: @icmp_nsw_commute( +; CHECK-NEXT: [[ADD5:%.*]] = add i32 5, [[V:%.*]] +; CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[V]], 6 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[ADD5]], [[ADD6]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %add5 = add i32 5, %V + %add6 = add nsw i32 %V, 6 + %cmp = icmp slt i32 %add5, %add6 + ret i1 %cmp +} + +define i1 @icmp_nsw_commute2(i32 %V) { +; CHECK-LABEL: @icmp_nsw_commute2( +; CHECK-NEXT: [[ADD5:%.*]] = add i32 [[V:%.*]], 5 +; CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 6, [[V]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[ADD5]], [[ADD6]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %add5 = add i32 %V, 5 + %add6 = add nsw i32 6, %V + %cmp = icmp slt i32 %add5, %add6 + ret i1 %cmp +} + +define i1 @icmp_nsw_commute3(i32 %V) { +; CHECK-LABEL: @icmp_nsw_commute3( +; CHECK-NEXT: [[ADD5:%.*]] = add i32 5, [[V:%.*]] +; CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 6, [[V]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[ADD5]], [[ADD6]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %add5 = add i32 5, %V + %add6 = add nsw i32 6, %V + %cmp = icmp slt i32 %add5, %add6 + ret i1 %cmp +} + define i1 @icmp_nsw_22(i32 %V) { ; CHECK-LABEL: @icmp_nsw_22( ; CHECK-NEXT: ret i1 true From a6336eab0c507b665dda63fa9db1292a2cdee409 Mon Sep 17 00:00:00 2001 From: Adam Balogh Date: Thu, 15 Oct 2020 15:07:48 +0200 Subject: [PATCH 027/179] [ADT] Fix for ImmutableMapRef The `Root` member of `ImmutableMapRef` was changed recently from a plain pointer to `IntrusiveRefCntPtr`. However, the `Profile` member function was not adjusted. This results in comilation error whenever the `Profile` method is used on an `ImmutableMapRef`. This patch fixes this issue and also adds unit tests for `ImmutableMapRef`. Differential Revision: https://reviews.llvm.org/D89486 --- llvm/include/llvm/ADT/ImmutableMap.h | 2 +- llvm/unittests/ADT/ImmutableMapTest.cpp | 43 +++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/ADT/ImmutableMap.h b/llvm/include/llvm/ADT/ImmutableMap.h index 30689d2274a87..81b21a7319a71 100644 --- a/llvm/include/llvm/ADT/ImmutableMap.h +++ b/llvm/include/llvm/ADT/ImmutableMap.h @@ -355,7 +355,7 @@ class ImmutableMapRef { unsigned getHeight() const { return Root ? Root->getHeight() : 0; } static inline void Profile(FoldingSetNodeID &ID, const ImmutableMapRef &M) { - ID.AddPointer(M.Root); + ID.AddPointer(M.Root.get()); } inline void Profile(FoldingSetNodeID &ID) const { return Profile(ID, *this); } diff --git a/llvm/unittests/ADT/ImmutableMapTest.cpp b/llvm/unittests/ADT/ImmutableMapTest.cpp index fa61816d213cf..4541ea94742a9 100644 --- a/llvm/unittests/ADT/ImmutableMapTest.cpp +++ b/llvm/unittests/ADT/ImmutableMapTest.cpp @@ -46,4 +46,47 @@ TEST(ImmutableMapTest, MultiElemIntMapTest) { EXPECT_EQ(3U, S2.getHeight()); } +TEST(ImmutableMapTest, EmptyIntMapRefTest) { + using int_int_map = ImmutableMapRef; + ImmutableMapRef::FactoryTy *f = + new ImmutableMapRef::FactoryTy(); + + EXPECT_TRUE(int_int_map::getEmptyMap(f) == int_int_map::getEmptyMap(f)); + EXPECT_FALSE(int_int_map::getEmptyMap(f) != int_int_map::getEmptyMap(f)); + EXPECT_TRUE(int_int_map::getEmptyMap(f).isEmpty()); + + int_int_map S = int_int_map::getEmptyMap(f); + EXPECT_EQ(0u, S.getHeight()); + EXPECT_TRUE(S.begin() == S.end()); + EXPECT_FALSE(S.begin() != S.end()); +} + +TEST(ImmutableMapTest, MultiElemIntMapRefTest) { + ImmutableMapRef::FactoryTy *f = + new ImmutableMapRef::FactoryTy(); + + ImmutableMapRef S = ImmutableMapRef::getEmptyMap(f); + + ImmutableMapRef S2 = S.add(3, 10).add(4, 11).add(5, 12); + + EXPECT_TRUE(S.isEmpty()); + EXPECT_FALSE(S2.isEmpty()); + + EXPECT_EQ(nullptr, S.lookup(3)); + EXPECT_EQ(nullptr, S.lookup(9)); + + EXPECT_EQ(10, *S2.lookup(3)); + EXPECT_EQ(11, *S2.lookup(4)); + EXPECT_EQ(12, *S2.lookup(5)); + + EXPECT_EQ(5, S2.getMaxElement()->first); + EXPECT_EQ(3U, S2.getHeight()); +} + + TEST(ImmutableMapTest, MapOfMapRefsTest) { + ImmutableMap>::Factory f; + + EXPECT_TRUE(f.getEmptyMap() == f.getEmptyMap()); + } + } From 0c66606230df39e0bf4190f1fc2c2e2fb37a81ea Mon Sep 17 00:00:00 2001 From: John Brawn Date: Wed, 21 Oct 2020 11:18:04 +0100 Subject: [PATCH 028/179] [Driver] Incorporate -mfloat-abi in the computed triple on ARM LLVM assumes that when it creates a call to a C library function it can use the C calling convention. On ARM the effective calling convention is determined from the target triple, however using -mfloat-abi=hard on ARM means that calls to (and definitions of) C library functions use the arm_aapcs_vfpcc calling convention which can result in a mismatch. Fix this by incorporating -mfloat-abi into the target triple, similar to how -mbig-endian and -march/-mcpu are. This only works for EABI targets and not Android or iOS, but there the float abi is fixed so instead give an error. Fixes PR45524 Differential Revision: https://reviews.llvm.org/D89573 --- clang/lib/Driver/ToolChain.cpp | 31 +++++ clang/lib/Driver/ToolChains/Arch/ARM.cpp | 159 +++++++++++------------ clang/lib/Driver/ToolChains/Arch/ARM.h | 1 + clang/test/Driver/arm-float-abi-lto.c | 63 +++++++++ clang/test/Driver/arm-float-abi.c | 24 +++- clang/test/Driver/arm-triple.c | 48 +++++++ clang/test/Driver/windows-thumbv7em.cpp | 4 +- 7 files changed, 243 insertions(+), 87 deletions(-) create mode 100644 clang/test/Driver/arm-float-abi-lto.c create mode 100644 clang/test/Driver/arm-triple.c diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 8991216da6765..9fa53ce2dbeb6 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -787,6 +787,37 @@ std::string ToolChain::ComputeLLVMTriple(const ArgList &Args, } Triple.setArchName(ArchName + Suffix.str()); + bool isHardFloat = + (arm::getARMFloatABI(getDriver(), Triple, Args) == arm::FloatABI::Hard); + switch (Triple.getEnvironment()) { + case Triple::GNUEABI: + case Triple::GNUEABIHF: + Triple.setEnvironment(isHardFloat ? Triple::GNUEABIHF : Triple::GNUEABI); + break; + case Triple::EABI: + case Triple::EABIHF: + Triple.setEnvironment(isHardFloat ? Triple::EABIHF : Triple::EABI); + break; + case Triple::MuslEABI: + case Triple::MuslEABIHF: + Triple.setEnvironment(isHardFloat ? Triple::MuslEABIHF + : Triple::MuslEABI); + break; + default: { + arm::FloatABI DefaultABI = arm::getDefaultFloatABI(Triple); + if (DefaultABI != arm::FloatABI::Invalid && + isHardFloat != (DefaultABI == arm::FloatABI::Hard)) { + Arg *ABIArg = + Args.getLastArg(options::OPT_msoft_float, options::OPT_mhard_float, + options::OPT_mfloat_abi_EQ); + assert(ABIArg && "Non-default float abi expected to be from arg"); + D.Diag(diag::err_drv_unsupported_opt_for_target) + << ABIArg->getAsString(Args) << Triple.getTriple(); + } + break; + } + } + return Triple.getTriple(); } } diff --git a/clang/lib/Driver/ToolChains/Arch/ARM.cpp b/clang/lib/Driver/ToolChains/Arch/ARM.cpp index d74d5db0c083d..309a7298300f2 100644 --- a/clang/lib/Driver/ToolChains/Arch/ARM.cpp +++ b/clang/lib/Driver/ToolChains/Arch/ARM.cpp @@ -134,6 +134,7 @@ bool arm::useAAPCSForMachO(const llvm::Triple &T) { // The backend is hardwired to assume AAPCS for M-class processors, ensure // the frontend matches that. return T.getEnvironment() == llvm::Triple::EABI || + T.getEnvironment() == llvm::Triple::EABIHF || T.getOS() == llvm::Triple::UnknownOS || isARMMProfile(T); } @@ -160,11 +161,73 @@ arm::FloatABI arm::getARMFloatABI(const ToolChain &TC, const ArgList &Args) { return arm::getARMFloatABI(TC.getDriver(), TC.getEffectiveTriple(), Args); } +arm::FloatABI arm::getDefaultFloatABI(const llvm::Triple &Triple) { + auto SubArch = getARMSubArchVersionNumber(Triple); + switch (Triple.getOS()) { + case llvm::Triple::Darwin: + case llvm::Triple::MacOSX: + case llvm::Triple::IOS: + case llvm::Triple::TvOS: + // Darwin defaults to "softfp" for v6 and v7. + if (Triple.isWatchABI()) + return FloatABI::Hard; + else + return (SubArch == 6 || SubArch == 7) ? FloatABI::SoftFP : FloatABI::Soft; + + case llvm::Triple::WatchOS: + return FloatABI::Hard; + + // FIXME: this is invalid for WindowsCE + case llvm::Triple::Win32: + return FloatABI::Hard; + + case llvm::Triple::NetBSD: + switch (Triple.getEnvironment()) { + case llvm::Triple::EABIHF: + case llvm::Triple::GNUEABIHF: + return FloatABI::Hard; + default: + return FloatABI::Soft; + } + break; + + case llvm::Triple::FreeBSD: + switch (Triple.getEnvironment()) { + case llvm::Triple::GNUEABIHF: + return FloatABI::Hard; + default: + // FreeBSD defaults to soft float + return FloatABI::Soft; + } + break; + + case llvm::Triple::OpenBSD: + return FloatABI::SoftFP; + + default: + switch (Triple.getEnvironment()) { + case llvm::Triple::GNUEABIHF: + case llvm::Triple::MuslEABIHF: + case llvm::Triple::EABIHF: + return FloatABI::Hard; + case llvm::Triple::GNUEABI: + case llvm::Triple::MuslEABI: + case llvm::Triple::EABI: + // EABI is always AAPCS, and if it was not marked 'hard', it's softfp + return FloatABI::SoftFP; + case llvm::Triple::Android: + return (SubArch >= 7) ? FloatABI::SoftFP : FloatABI::Soft; + default: + return FloatABI::Invalid; + } + } + return FloatABI::Invalid; +} + // Select the float ABI as determined by -msoft-float, -mhard-float, and // -mfloat-abi=. arm::FloatABI arm::getARMFloatABI(const Driver &D, const llvm::Triple &Triple, const ArgList &Args) { - auto SubArch = getARMSubArchVersionNumber(Triple); arm::FloatABI ABI = FloatABI::Invalid; if (Arg *A = Args.getLastArg(options::OPT_msoft_float, options::OPT_mhard_float, @@ -184,95 +247,23 @@ arm::FloatABI arm::getARMFloatABI(const Driver &D, const llvm::Triple &Triple, ABI = FloatABI::Soft; } } - - // It is incorrect to select hard float ABI on MachO platforms if the ABI is - // "apcs-gnu". - if (Triple.isOSBinFormatMachO() && !useAAPCSForMachO(Triple) && - ABI == FloatABI::Hard) { - D.Diag(diag::err_drv_unsupported_opt_for_target) << A->getAsString(Args) - << Triple.getArchName(); - } } // If unspecified, choose the default based on the platform. - if (ABI == FloatABI::Invalid) { - switch (Triple.getOS()) { - case llvm::Triple::Darwin: - case llvm::Triple::MacOSX: - case llvm::Triple::IOS: - case llvm::Triple::TvOS: { - // Darwin defaults to "softfp" for v6 and v7. - ABI = (SubArch == 6 || SubArch == 7) ? FloatABI::SoftFP : FloatABI::Soft; - ABI = Triple.isWatchABI() ? FloatABI::Hard : ABI; - break; - } - case llvm::Triple::WatchOS: - ABI = FloatABI::Hard; - break; + if (ABI == FloatABI::Invalid) + ABI = arm::getDefaultFloatABI(Triple); - // FIXME: this is invalid for WindowsCE - case llvm::Triple::Win32: + if (ABI == FloatABI::Invalid) { + // Assume "soft", but warn the user we are guessing. + if (Triple.isOSBinFormatMachO() && + Triple.getSubArch() == llvm::Triple::ARMSubArch_v7em) ABI = FloatABI::Hard; - break; - - case llvm::Triple::NetBSD: - switch (Triple.getEnvironment()) { - case llvm::Triple::EABIHF: - case llvm::Triple::GNUEABIHF: - ABI = FloatABI::Hard; - break; - default: - ABI = FloatABI::Soft; - break; - } - break; - - case llvm::Triple::FreeBSD: - switch (Triple.getEnvironment()) { - case llvm::Triple::GNUEABIHF: - ABI = FloatABI::Hard; - break; - default: - // FreeBSD defaults to soft float - ABI = FloatABI::Soft; - break; - } - break; - - case llvm::Triple::OpenBSD: - ABI = FloatABI::SoftFP; - break; + else + ABI = FloatABI::Soft; - default: - switch (Triple.getEnvironment()) { - case llvm::Triple::GNUEABIHF: - case llvm::Triple::MuslEABIHF: - case llvm::Triple::EABIHF: - ABI = FloatABI::Hard; - break; - case llvm::Triple::GNUEABI: - case llvm::Triple::MuslEABI: - case llvm::Triple::EABI: - // EABI is always AAPCS, and if it was not marked 'hard', it's softfp - ABI = FloatABI::SoftFP; - break; - case llvm::Triple::Android: - ABI = (SubArch >= 7) ? FloatABI::SoftFP : FloatABI::Soft; - break; - default: - // Assume "soft", but warn the user we are guessing. - if (Triple.isOSBinFormatMachO() && - Triple.getSubArch() == llvm::Triple::ARMSubArch_v7em) - ABI = FloatABI::Hard; - else - ABI = FloatABI::Soft; - - if (Triple.getOS() != llvm::Triple::UnknownOS || - !Triple.isOSBinFormatMachO()) - D.Diag(diag::warn_drv_assuming_mfloat_abi_is) << "soft"; - break; - } - } + if (Triple.getOS() != llvm::Triple::UnknownOS || + !Triple.isOSBinFormatMachO()) + D.Diag(diag::warn_drv_assuming_mfloat_abi_is) << "soft"; } assert(ABI != FloatABI::Invalid && "must select an ABI"); diff --git a/clang/lib/Driver/ToolChains/Arch/ARM.h b/clang/lib/Driver/ToolChains/Arch/ARM.h index 0ba1a59852aa0..091c09b160ae8 100644 --- a/clang/lib/Driver/ToolChains/Arch/ARM.h +++ b/clang/lib/Driver/ToolChains/Arch/ARM.h @@ -47,6 +47,7 @@ enum class FloatABI { Hard, }; +FloatABI getDefaultFloatABI(const llvm::Triple &Triple); FloatABI getARMFloatABI(const ToolChain &TC, const llvm::opt::ArgList &Args); FloatABI getARMFloatABI(const Driver &D, const llvm::Triple &Triple, const llvm::opt::ArgList &Args); diff --git a/clang/test/Driver/arm-float-abi-lto.c b/clang/test/Driver/arm-float-abi-lto.c new file mode 100644 index 0000000000000..83c2435d97a4d --- /dev/null +++ b/clang/test/Driver/arm-float-abi-lto.c @@ -0,0 +1,63 @@ +// REQUIRES: arm-registered-target + +// RUN: %clang --target=arm-none-eabi -mcpu=cortex-m33 -mfloat-abi=hard -O1 %s -S -o - -emit-llvm -DCALL_LIB -DDEFINE_LIB | FileCheck %s + +// RUN: %clang --target=arm-none-eabi -mcpu=cortex-m33 -mfloat-abi=hard -O1 %s -flto=full -c -o %t.call_full.bc -DCALL_LIB +// RUN: %clang --target=arm-none-eabi -mcpu=cortex-m33 -mfloat-abi=hard -O1 %s -flto=full -c -o %t.define_full.bc -DDEFINE_LIB +// RUN: llvm-lto2 run -o %t.lto_full -save-temps %t.call_full.bc %t.define_full.bc \ +// RUN: -r %t.call_full.bc,fn,px \ +// RUN: -r %t.call_full.bc,fwrite,l \ +// RUN: -r %t.call_full.bc,putchar,l \ +// RUN: -r %t.call_full.bc,stdout,px \ +// RUN: -r %t.define_full.bc,fwrite,px \ +// RUN: -r %t.define_full.bc,putchar,px \ +// RUN: -r %t.define_full.bc,otherfn,px +// RUN: llvm-dis %t.lto_full.0.4.opt.bc -o - | FileCheck %s + +// RUN: %clang --target=arm-none-eabi -mcpu=cortex-m33 -mfloat-abi=hard -O1 %s -flto=thin -c -o %t.call_thin.bc -DCALL_LIB +// RUN: %clang --target=arm-none-eabi -mcpu=cortex-m33 -mfloat-abi=hard -O1 %s -flto=thin -c -o %t.define_thin.bc -DDEFINE_LIB +// RUN: llvm-lto2 run -o %t.lto_thin -save-temps %t.call_thin.bc %t.define_thin.bc \ +// RUN: -r %t.call_thin.bc,fn,px \ +// RUN: -r %t.call_thin.bc,fwrite,l \ +// RUN: -r %t.call_thin.bc,putchar,l \ +// RUN: -r %t.call_thin.bc,stdout,px \ +// RUN: -r %t.define_thin.bc,fwrite,px \ +// RUN: -r %t.define_thin.bc,putchar,px \ +// RUN: -r %t.define_thin.bc,otherfn,px +// RUN: llvm-dis %t.lto_thin.1.4.opt.bc -o - | FileCheck %s + +// We expect that the fprintf is optimised to fwrite, and the printf is +// optimised to putchar. Check that we don't have a mismatch in calling +// conventions causing the call to be replaced by a trap. +// CHECK-LABEL: define{{.*}}void @fn() +// CHECK-NOT: call void @llvm.trap() + +typedef struct FILE FILE; +typedef unsigned int size_t; +extern FILE *stdout; +extern int fprintf(FILE *, const char *, ...); +extern int printf(const char *, ...); +extern void otherfn(const void *); + +#ifdef CALL_LIB + +void fn() { + fprintf(stdout, "hello world"); + printf("a"); +} + +#endif + +#ifdef DEFINE_LIB + +size_t fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream) { + otherfn(ptr); + return 0; +} + +int putchar(int c) { + otherfn(&c); + return 0; +} + +#endif diff --git a/clang/test/Driver/arm-float-abi.c b/clang/test/Driver/arm-float-abi.c index 9a76d1ee39c45..74ba3fd3bc579 100644 --- a/clang/test/Driver/arm-float-abi.c +++ b/clang/test/Driver/arm-float-abi.c @@ -2,7 +2,7 @@ // RUN: %clang %s -target armv7-apple-ios -mfloat-abi=softfp -### 2>&1 | FileCheck -check-prefix=NOERROR %s // RUN: %clang %s -arch armv7 -target thumbv7-apple-darwin-eabi -mfloat-abi=hard -### 2>&1 | FileCheck -check-prefix=NOERROR %s -// ARMV7-ERROR: unsupported option '-mfloat-abi=hard' for target 'thumbv7' +// ARMV7-ERROR: unsupported option '-mfloat-abi=hard' for target 'thumbv7-apple-ios' // NOERROR-NOT: unsupported option // RUN: %clang -target armv7-linux-androideabi21 %s -### -c 2>&1 \ @@ -14,3 +14,25 @@ // RUN: | FileCheck --check-prefix=CHECK-ARM8-ANDROID %s // CHECK-ARM8-ANDROID-NOT: "-target-feature" "+soft-float" // CHECK-ARM8-ANDROID: "-target-feature" "+soft-float-abi" + +// RUN: not %clang -target armv7-linux-androideabi21 %s -S -o - -mfloat-abi=hard 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-ANDROID-ERROR %s +// CHECK-ANDROID-ERROR: unsupported option '-mfloat-abi=hard' for target 'armv7-unknown-linux-android21' + +// RUN: %clang -target armv7-linux-androideabi21 %s -S -o - -mfloat-abi=soft 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-ANDROID-NOERROR %s +// RUN: %clang -target armv7-linux-androideabi21 %s -S -o - -mfloat-abi=softfp 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-ANDROID-NOERROR %s +// CHECK-ANDROID-NOERROR-NOT: unsupported option + +// RUN: not %clang -target armv7-apple-watchos4 %s -S -o - -mfloat-abi=soft 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-WATCHOS-ERROR1 %s +// CHECK-WATCHOS-ERROR1: unsupported option '-mfloat-abi=soft' for target 'thumbv7-apple-watchos4' + +// RUN: not %clang -target armv7-apple-watchos4 %s -S -o - -mfloat-abi=softfp 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-WATCHOS-ERROR2 %s +// CHECK-WATCHOS-ERROR2: unsupported option '-mfloat-abi=softfp' for target 'thumbv7-apple-watchos4' + +// RUN: %clang -target armv7-apple-watchos4 %s -S -o - -mfloat-abi=hard 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-WATCHOS-NOERROR %s +// CHECK-WATCHOS-NOERROR-NOT: unsupported option diff --git a/clang/test/Driver/arm-triple.c b/clang/test/Driver/arm-triple.c new file mode 100644 index 0000000000000..fa9f7b189c827 --- /dev/null +++ b/clang/test/Driver/arm-triple.c @@ -0,0 +1,48 @@ +// RUN: %clang -print-effective-triple \ +// RUN: --target=arm-none-eabi \ +// RUN: | FileCheck %s --check-prefix=CHECK-DEFAULT +// RUN: %clang -print-effective-triple \ +// RUN: --target=armeb-none-eabi -mlittle-endian \ +// RUN: | FileCheck %s --check-prefix=CHECK-DEFAULT +// RUN: %clang -print-effective-triple \ +// RUN: --target=arm-none-eabihf -march=armv4t -mfloat-abi=softfp \ +// RUN: | FileCheck %s --check-prefix=CHECK-DEFAULT +// CHECK-DEFAULT: armv4t-none-unknown-eabi + +// RUN: %clang -print-effective-triple \ +// RUN: --target=armeb-none-eabi \ +// RUN: | FileCheck %s --check-prefix=CHECK-EB +// RUN: %clang -print-effective-triple \ +// RUN: --target=arm-none-eabi -mbig-endian \ +// RUN: | FileCheck %s --check-prefix=CHECK-EB +// CHECK-EB: armebv4t-none-unknown-eabi + +// RUN: %clang -print-effective-triple \ +// RUN: --target=arm-none-eabihf -march=armv4t \ +// RUN: | FileCheck %s --check-prefix=CHECK-HF +// RUN: %clang -print-effective-triple \ +// RUN: --target=arm-none-eabi -mfloat-abi=hard \ +// RUN: | FileCheck %s --check-prefix=CHECK-HF +// CHECK-HF: armv4t-none-unknown-eabihf + +// RUN: %clang -print-effective-triple \ +// RUN: --target=armeb-none-eabihf -march=armv4t \ +// RUN: | FileCheck %s --check-prefix=CHECK-EB-HF +// RUN: %clang -print-effective-triple \ +// RUN: --target=armeb-none-eabi -mfloat-abi=hard \ +// RUN: | FileCheck %s --check-prefix=CHECK-EB-HF +// RUN: %clang -print-effective-triple -march=armv4t \ +// RUN: --target=arm-none-eabihf -mbig-endian \ +// RUN: | FileCheck %s --check-prefix=CHECK-EB-HF +// RUN: %clang -print-effective-triple \ +// RUN: --target=arm-none-eabi -mbig-endian -mfloat-abi=hard \ +// RUN: | FileCheck %s --check-prefix=CHECK-EB-HF +// CHECK-EB-HF: armebv4t-none-unknown-eabihf + +// RUN: %clang -print-effective-triple \ +// RUN: --target=arm-none-eabi -march=armv8m.main -mbig-endian -mfloat-abi=hard \ +// RUN: | FileCheck %s --check-prefix=CHECK-V8M-EB-HF +// RUN: %clang -print-effective-triple \ +// RUN: --target=arm-none-eabi -mcpu=cortex-m33 -mbig-endian -mfloat-abi=hard \ +// RUN: | FileCheck %s --check-prefix=CHECK-V8M-EB-HF +// CHECK-V8M-EB-HF: thumbebv8m.main-none-unknown-eabihf diff --git a/clang/test/Driver/windows-thumbv7em.cpp b/clang/test/Driver/windows-thumbv7em.cpp index 5d7c00b31fd16..94d18e4a9b83b 100644 --- a/clang/test/Driver/windows-thumbv7em.cpp +++ b/clang/test/Driver/windows-thumbv7em.cpp @@ -1,8 +1,8 @@ // RUN: %clang -target thumb-none-windows-eabi-coff -mcpu=cortex-m7 -### -c %s 2>&1 \ // RUN: | FileCheck %s --check-prefix CHECK-V7 -// CHECK-V7-NOT: error: the target architecture 'thumbv7em' is not supported by the target 'thumbv7em-none-windows-eabi' +// CHECK-V7-NOT: error: the target architecture 'thumbv7em' is not supported by the target 'thumbv7em-none-windows-eabihf' // RUN: %clang -target thumb-none-windows-eabi-coff -mcpu=cortex-m1 -### -c %s 2>&1 \ // RUN: | FileCheck %s --check-prefix CHECK-V6 -// CHECK-V6: error: the target architecture 'thumbv6m' is not supported by the target 'thumbv6m-none-windows-eabi' +// CHECK-V6: error: the target architecture 'thumbv6m' is not supported by the target 'thumbv6m-none-windows-eabihf' From 5290f50e44b632c22800df15c691394e768796be Mon Sep 17 00:00:00 2001 From: Sebastian Neubauer Date: Tue, 20 Oct 2020 18:23:24 +0200 Subject: [PATCH 029/179] [AMDGPU] Fix off by one in assert D89217 did not subtract one when accessing SubRegFromChannelTable in one place. Differential Revision: https://reviews.llvm.org/D89804 --- llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index c7a0121b058be..6000c8a2a7fdc 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -98,9 +98,10 @@ SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) Width = SubRegFromChannelTableWidthMap[Width]; if (Width == 0) continue; - assert((Width - 1) < SubRegFromChannelTable.size()); - assert(Offset < SubRegFromChannelTable[Width].size()); - SubRegFromChannelTable[Width - 1][Offset] = Idx; + unsigned TableIdx = Width - 1; + assert(TableIdx < SubRegFromChannelTable.size()); + assert(Offset < SubRegFromChannelTable[TableIdx].size()); + SubRegFromChannelTable[TableIdx][Offset] = Idx; } }; From 1af51f077b003253ff50567022efd1e850b2ec54 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Wed, 21 Oct 2020 10:02:50 +0100 Subject: [PATCH 030/179] [TargetLowering] Add test for bit comparison fold This adds a test covering an issue in bit comparison folding. The issue will be addressed in the subsequent commit. Patch by Erik Hogeman. Differential Revision: https://reviews.llvm.org/D89390 --- llvm/test/CodeGen/NVPTX/pow2_mask_cmp.ll | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 llvm/test/CodeGen/NVPTX/pow2_mask_cmp.ll diff --git a/llvm/test/CodeGen/NVPTX/pow2_mask_cmp.ll b/llvm/test/CodeGen/NVPTX/pow2_mask_cmp.ll new file mode 100644 index 0000000000000..13e35d940528e --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/pow2_mask_cmp.ll @@ -0,0 +1,13 @@ +; RUN: llc -march=nvptx -verify-machineinstrs < %s | FileCheck %s + +; Tests the following pattern: +; (X & 8) != 0 --> (X & 8) >> 3 + +; CHECK-LABEL: @pow2_mask_cmp +; CHECK: bfe.u32 {{%r[0-9]+}}, {{%r[0-9]+}}, 3, 1 +define i32 @pow2_mask_cmp(i32 %x) { + %a = and i32 %x, 8 + %cmp = icmp ne i32 %a, 0 + %r = zext i1 %cmp to i32 + ret i32 %r +} From bfc961aeb2d0e5a05bca7a894cbc4370f5e79a6a Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Wed, 21 Oct 2020 11:46:55 +0100 Subject: [PATCH 031/179] [TargetLowering] Check boolean content when folding bit compare Updates an optimization that relies on boolean contents being either 0 or 1 to properly check for this before triggering. The following: (X & 8) != 0 --> (X & 8) >> 3 Produces unexpected results when a boolean 'true' value is represented by negative one. Patch by Erik Hogeman. Differential Revision: https://reviews.llvm.org/D89390 --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 3 ++- llvm/test/CodeGen/NVPTX/pow2_mask_cmp.ll | 8 +++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 3e6c11a0c1299..bc81d0d8298cb 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3981,7 +3981,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, EVT ShValTy = N0.getValueType(); // Fold bit comparisons when we can. - if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + if (getBooleanContents(N0.getValueType()) == ZeroOrOneBooleanContent && + (Cond == ISD::SETEQ || Cond == ISD::SETNE) && (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) && N0.getOpcode() == ISD::AND) { if (auto *AndRHS = dyn_cast(N0.getOperand(1))) { diff --git a/llvm/test/CodeGen/NVPTX/pow2_mask_cmp.ll b/llvm/test/CodeGen/NVPTX/pow2_mask_cmp.ll index 13e35d940528e..45701df28a30f 100644 --- a/llvm/test/CodeGen/NVPTX/pow2_mask_cmp.ll +++ b/llvm/test/CodeGen/NVPTX/pow2_mask_cmp.ll @@ -3,8 +3,14 @@ ; Tests the following pattern: ; (X & 8) != 0 --> (X & 8) >> 3 +; This produces incorrect code when boolean false is represented +; as a negative one, and this test checks that the transform is +; not triggered. + ; CHECK-LABEL: @pow2_mask_cmp -; CHECK: bfe.u32 {{%r[0-9]+}}, {{%r[0-9]+}}, 3, 1 +; CHECK: and.b32 [[AND:%r[0-9]+]], %r{{[0-9]+}}, 8 +; CHECK: setp.ne.s32 [[SETP:%p[0-9+]]], [[AND]], 0 +; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, [[SETP]] define i32 @pow2_mask_cmp(i32 %x) { %a = and i32 %x, 8 %cmp = icmp ne i32 %a, 0 From 9a2d2bedb73058f22f4fea30fec14df8281638f5 Mon Sep 17 00:00:00 2001 From: Nicholas Guy Date: Mon, 28 Sep 2020 16:49:41 +0100 Subject: [PATCH 032/179] Add "SkipDead" parameter to TargetInstrInfo::DefinesPredicate Some instructions may be removable through processes such as IfConversion, however DefinesPredicate can not be made aware of when this should be considered. This parameter allows DefinesPredicate to distinguish these removable instructions on a per-call basis, allowing for more fine-grained control from processes like ifConversion. Renames DefinesPredicate to ClobbersPredicate, to better reflect it's purpose Differential Revision: https://reviews.llvm.org/D88494 --- llvm/include/llvm/CodeGen/TargetInstrInfo.h | 9 +++++++-- llvm/lib/CodeGen/IfConversion.cpp | 4 ++-- llvm/lib/Target/AMDGPU/R600InstrInfo.cpp | 5 +++-- llvm/lib/Target/AMDGPU/R600InstrInfo.h | 4 ++-- llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp | 5 +++-- llvm/lib/Target/ARM/ARMBaseInstrInfo.h | 4 ++-- llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp | 5 +++-- llvm/lib/Target/Hexagon/HexagonInstrInfo.h | 4 ++-- llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 5 +++-- llvm/lib/Target/PowerPC/PPCInstrInfo.h | 4 ++-- 10 files changed, 29 insertions(+), 20 deletions(-) diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 96cca0257782b..5d5a6efab2207 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -1399,8 +1399,13 @@ class TargetInstrInfo : public MCInstrInfo { /// If the specified instruction defines any predicate /// or condition code register(s) used for predication, returns true as well /// as the definition predicate(s) by reference. - virtual bool DefinesPredicate(MachineInstr &MI, - std::vector &Pred) const { + /// SkipDead should be set to false at any point that dead + /// predicate instructions should be considered as being defined. + /// A dead predicate instruction is one that is guaranteed to be removed + /// after a call to PredicateInstruction. + virtual bool ClobbersPredicate(MachineInstr &MI, + std::vector &Pred, + bool SkipDead) const { return false; } diff --git a/llvm/lib/CodeGen/IfConversion.cpp b/llvm/lib/CodeGen/IfConversion.cpp index 1a5c5d6850172..d149f8c3a139e 100644 --- a/llvm/lib/CodeGen/IfConversion.cpp +++ b/llvm/lib/CodeGen/IfConversion.cpp @@ -751,7 +751,7 @@ bool IfConverter::CountDuplicatedInstructions( // A pred-clobbering instruction in the shared portion prevents // if-conversion. std::vector PredDefs; - if (TII->DefinesPredicate(*TIB, PredDefs)) + if (TII->ClobbersPredicate(*TIB, PredDefs, false)) return false; // If we get all the way to the branch instructions, don't count them. if (!TIB->isBranch()) @@ -1146,7 +1146,7 @@ void IfConverter::ScanInstructions(BBInfo &BBI, // FIXME: Make use of PredDefs? e.g. ADDC, SUBC sets predicates but are // still potentially predicable. std::vector PredDefs; - if (TII->DefinesPredicate(MI, PredDefs)) + if (TII->ClobbersPredicate(MI, PredDefs, true)) BBI.ClobbersPred = true; if (!TII->isPredicable(MI)) { diff --git a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp index cf5791f39e562..e3439e6c9e884 100644 --- a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp @@ -963,8 +963,9 @@ R600InstrInfo::reverseBranchCondition(SmallVectorImpl &Cond) con return false; } -bool R600InstrInfo::DefinesPredicate(MachineInstr &MI, - std::vector &Pred) const { +bool R600InstrInfo::ClobbersPredicate(MachineInstr &MI, + std::vector &Pred, + bool SkipDead) const { return isPredicateSetter(MI.getOpcode()); } diff --git a/llvm/lib/Target/AMDGPU/R600InstrInfo.h b/llvm/lib/Target/AMDGPU/R600InstrInfo.h index 873ee08470cb3..1e249c6348f14 100644 --- a/llvm/lib/Target/AMDGPU/R600InstrInfo.h +++ b/llvm/lib/Target/AMDGPU/R600InstrInfo.h @@ -194,8 +194,8 @@ class R600InstrInfo final : public R600GenInstrInfo { unsigned NumFCycles, unsigned ExtraFCycles, BranchProbability Probability) const override; - bool DefinesPredicate(MachineInstr &MI, - std::vector &Pred) const override; + bool ClobbersPredicate(MachineInstr &MI, std::vector &Pred, + bool SkipDead) const override; bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, MachineBasicBlock &FMBB) const override; diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index d81c8efa1597d..3822f9057d949 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -589,8 +589,9 @@ bool ARMBaseInstrInfo::SubsumesPredicate(ArrayRef Pred1, } } -bool ARMBaseInstrInfo::DefinesPredicate( - MachineInstr &MI, std::vector &Pred) const { +bool ARMBaseInstrInfo::ClobbersPredicate(MachineInstr &MI, + std::vector &Pred, + bool SkipDead) const { bool Found = false; for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI.getOperand(i); diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index 5bf6e880056de..f997322107afb 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -171,8 +171,8 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo { bool SubsumesPredicate(ArrayRef Pred1, ArrayRef Pred2) const override; - bool DefinesPredicate(MachineInstr &MI, - std::vector &Pred) const override; + bool ClobbersPredicate(MachineInstr &MI, std::vector &Pred, + bool SkipDead) const override; bool isPredicable(const MachineInstr &MI) const override; diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp index 93215a4b61870..26fc093d15a76 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -1639,8 +1639,9 @@ bool HexagonInstrInfo::SubsumesPredicate(ArrayRef Pred1, return false; } -bool HexagonInstrInfo::DefinesPredicate(MachineInstr &MI, - std::vector &Pred) const { +bool HexagonInstrInfo::ClobbersPredicate(MachineInstr &MI, + std::vector &Pred, + bool SkipDead) const { const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo(); for (unsigned oper = 0; oper < MI.getNumOperands(); ++oper) { diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h index 847b9a6728916..11717996935d2 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h @@ -238,8 +238,8 @@ class HexagonInstrInfo : public HexagonGenInstrInfo { /// If the specified instruction defines any predicate /// or condition code register(s) used for predication, returns true as well /// as the definition predicate(s) by reference. - bool DefinesPredicate(MachineInstr &MI, - std::vector &Pred) const override; + bool ClobbersPredicate(MachineInstr &MI, std::vector &Pred, + bool SkipDead) const override; /// Return true if the specified instruction can be predicated. /// By default, this returns true for every instruction with a diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index fd83b5b6d4b8c..487bf925ac999 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -1802,8 +1802,9 @@ bool PPCInstrInfo::SubsumesPredicate(ArrayRef Pred1, return false; } -bool PPCInstrInfo::DefinesPredicate(MachineInstr &MI, - std::vector &Pred) const { +bool PPCInstrInfo::ClobbersPredicate(MachineInstr &MI, + std::vector &Pred, + bool SkipDead) const { // Note: At the present time, the contents of Pred from this function is // unused by IfConversion. This implementation follows ARM by pushing the // CR-defining operand. Because the 'DZ' and 'DNZ' count as types of diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h index e3e87022e97c3..79b7bd4a1d38b 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -472,8 +472,8 @@ class PPCInstrInfo : public PPCGenInstrInfo { bool SubsumesPredicate(ArrayRef Pred1, ArrayRef Pred2) const override; - bool DefinesPredicate(MachineInstr &MI, - std::vector &Pred) const override; + bool ClobbersPredicate(MachineInstr &MI, std::vector &Pred, + bool SkipDead) const override; // Comparison optimization. From 88523f6f4bd10287455f058b386306e11a518b3d Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 21 Oct 2020 11:08:25 +0100 Subject: [PATCH 033/179] [DAG] getNode(ISD::EXTRACT_SUBVECTOR) Drop unnecessary N2C null check - we assert that this isn't null and have already used the pointer. NFCI. Fixes cppcheck + null dereference warning. --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index b1e2679d86dc6..24c32d367283a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5588,8 +5588,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, // EXTRACT_SUBVECTOR of CONCAT_VECTOR can be simplified if the pieces of // the concat have the same type as the extract. - if (N2C && N1.getOpcode() == ISD::CONCAT_VECTORS && - N1.getNumOperands() > 0 && VT == N1.getOperand(0).getValueType()) { + if (N1.getOpcode() == ISD::CONCAT_VECTORS && N1.getNumOperands() > 0 && + VT == N1.getOperand(0).getValueType()) { unsigned Factor = VT.getVectorMinNumElements(); return N1.getOperand(N2C->getZExtValue() / Factor); } From 7b4a828452f4f59adf1561697b853e78efd006a9 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 21 Oct 2020 11:53:25 +0100 Subject: [PATCH 034/179] [InstCombine] foldOrOfICmps - use m_Specific instead of explicit comparisons. NFCI. --- .../lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index b34ba4e7908f3..21a0924497341 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -2301,15 +2301,15 @@ Value *InstCombinerImpl::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, LHSC->getType() == RHSC->getType() && LHSC->getValue() == (RHSC->getValue())) { - Value *LAddOpnd, *RAddOpnd; + Value *AddOpnd; ConstantInt *LAddC, *RAddC; - if (match(LHS0, m_Add(m_Value(LAddOpnd), m_ConstantInt(LAddC))) && - match(RHS0, m_Add(m_Value(RAddOpnd), m_ConstantInt(RAddC))) && + if (match(LHS0, m_Add(m_Value(AddOpnd), m_ConstantInt(LAddC))) && + match(RHS0, m_Add(m_Specific(AddOpnd), m_ConstantInt(RAddC))) && LAddC->getValue().ugt(LHSC->getValue()) && RAddC->getValue().ugt(LHSC->getValue())) { APInt DiffC = LAddC->getValue() ^ RAddC->getValue(); - if (LAddOpnd == RAddOpnd && DiffC.isPowerOf2()) { + if (DiffC.isPowerOf2()) { ConstantInt *MaxAddC = nullptr; if (LAddC->getValue().ult(RAddC->getValue())) MaxAddC = RAddC; @@ -2329,7 +2329,7 @@ Value *InstCombinerImpl::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, RangeDiff.ugt(LHSC->getValue())) { Value *MaskC = ConstantInt::get(LAddC->getType(), ~DiffC); - Value *NewAnd = Builder.CreateAnd(LAddOpnd, MaskC); + Value *NewAnd = Builder.CreateAnd(AddOpnd, MaskC); Value *NewAdd = Builder.CreateAdd(NewAnd, MaxAddC); return Builder.CreateICmp(LHS->getPredicate(), NewAdd, LHSC); } From c50f0d239dc367b087ef7b06d413ae1df0750758 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 21 Oct 2020 12:59:50 +0100 Subject: [PATCH 035/179] [Clang] Update newpm pipeline test in clang after D87322. This fixes a test failure because a LLVM pipeline test file in clang/ did not get updated in 88241ffb5636. --- clang/test/CodeGen/thinlto-distributed-newpm.ll | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/test/CodeGen/thinlto-distributed-newpm.ll b/clang/test/CodeGen/thinlto-distributed-newpm.ll index de758e3758b69..3056bf45877ac 100644 --- a/clang/test/CodeGen/thinlto-distributed-newpm.ll +++ b/clang/test/CodeGen/thinlto-distributed-newpm.ll @@ -129,13 +129,13 @@ ; CHECK-O: Running pass: InstCombinePass on main ; CHECK-O: Running pass: JumpThreadingPass on main ; CHECK-O: Running pass: CorrelatedValuePropagationPass on main -; CHECK-O: Running pass: DSEPass on main +; CHECK-O: Running pass: ADCEPass on main ; CHECK-O: Running analysis: PostDominatorTreeAnalysis on main +; CHECK-O: Running pass: DSEPass on main ; CHECK-O: Starting {{.*}}Function pass manager run. ; CHECK-O: Running pass: LoopSimplifyPass on main ; CHECK-O: Running pass: LCSSAPass on main ; CHECK-O: Finished {{.*}}Function pass manager run. -; CHECK-O: Running pass: ADCEPass on main ; CHECK-O: Running pass: SimplifyCFGPass on main ; CHECK-O: Running pass: InstCombinePass on main ; CHECK-O: Finished {{.*}}Function pass manager run. From 87f6de72bcd346bbbf468e9f9a0e9d1bbf0630a9 Mon Sep 17 00:00:00 2001 From: David Zarzycki Date: Wed, 21 Oct 2020 08:07:26 -0400 Subject: [PATCH 036/179] [clang testing] Fix a read-only source build system failure --- clang/test/CodeGen/basic-block-sections.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/CodeGen/basic-block-sections.c b/clang/test/CodeGen/basic-block-sections.c index 3d68394bb78eb..2eefbbfd21e19 100644 --- a/clang/test/CodeGen/basic-block-sections.c +++ b/clang/test/CodeGen/basic-block-sections.c @@ -6,7 +6,7 @@ // RUN: %clang_cc1 -triple x86_64 -S -fbasic-block-sections=all -o - < %s | FileCheck %s --check-prefix=BB_WORLD --check-prefix=BB_ALL // RUN: %clang_cc1 -triple x86_64 -S -fbasic-block-sections=list=%S/Inputs/basic-block-sections.funcnames -o - < %s | FileCheck %s --check-prefix=BB_WORLD --check-prefix=BB_LIST // RUN: %clang_cc1 -triple x86_64 -S -fbasic-block-sections=all -funique-basic-block-section-names -o - < %s | FileCheck %s --check-prefix=UNIQUE -// RUN: not %clang_cc1 -fbasic-block-sections=list= -emit-obj %s 2>&1 | FileCheck %s --check-prefix=ERROR +// RUN: not %clang_cc1 -fbasic-block-sections=list= -emit-obj -o - %s 2>&1 | FileCheck %s --check-prefix=ERROR int world(int a) { if (a > 10) From 9f5ece63ce62253321a8e8cdd3e052b5b5270b8e Mon Sep 17 00:00:00 2001 From: Evgeny Leviant Date: Wed, 21 Oct 2020 15:09:26 +0300 Subject: [PATCH 037/179] [llvm-mca] Add test for cortex-a57 memory instructions --- .../ARM/cortex-a57-memory-instructions.s | 436 ++++++++++++++++++ 1 file changed, 436 insertions(+) create mode 100644 llvm/test/tools/llvm-mca/ARM/cortex-a57-memory-instructions.s diff --git a/llvm/test/tools/llvm-mca/ARM/cortex-a57-memory-instructions.s b/llvm/test/tools/llvm-mca/ARM/cortex-a57-memory-instructions.s new file mode 100644 index 0000000000000..a0c99f77720e8 --- /dev/null +++ b/llvm/test/tools/llvm-mca/ARM/cortex-a57-memory-instructions.s @@ -0,0 +1,436 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=armv8 -mcpu=cortex-a57 -instruction-tables < %s | FileCheck %s + + .text + ldr r5, [r7] + ldr r6, [r3, #63] + ldr r2, [r4, #4095]! + ldr r1, [r2], #30 + ldr r3, [r1], #-30 + ldr r3, [r8, r1] + ldr r2, [r5, -r3] + ldr r1, [r5, r9]! + ldr r6, [r7, -r8]! + ldr r1, [r0, r2, lsr #3]! + ldr r5, [r9], r2 + ldr r4, [r3], -r6 + ldr r3, [r8, -r2, lsl #15] + ldr r1, [r5], r3, asr #15 + ldrb r3, [r8] + ldrb r1, [sp, #63] + ldrb r9, [r3, #4095]! + ldrb r8, [r1], #22 + ldrb r2, [r7], #-19 + ldrb r9, [r8, r5] + ldrb r1, [r5, -r1] + ldrb r3, [r5, r2]! + ldrb r6, [r9, -r3]! + ldrb r2, [r1], r4 + ldrb r8, [r4], -r5 + ldrb r7, [r12, -r1, lsl #15] + ldrb r5, [r2], r9, asr #15 + ldrbt r3, [r1], #4 + ldrbt r2, [r8], #-8 + ldrbt r8, [r7], r6 + ldrbt r1, [r2], -r6, lsl #12 + ldrd r0, r1, [r5] + ldrd r8, r9, [r2, #15] + ldrd r2, r3, [r9, #32]! + ldrd r6, r7, [r1], #8 + ldrd r2, r3, [r8], #0 + ldrd r2, r3, [r8], #0 + ldrd r2, r3, [r8], #-0 + ldrd r4, r5, [r1, r3] + ldrd r4, r5, [r7, r2]! + ldrd r0, r1, [r8], r12 + ldrd r0, r1, [r8], -r12 + ldrh r3, [r4] + ldrh r2, [r7, #4] + ldrh r1, [r8, #64]! + ldrh r12, [sp], #4 + ldrh r6, [r5, r4] + ldrh r3, [r8, r11]! + ldrh r1, [r2, -r1]! + ldrh r9, [r7], r2 + ldrh r4, [r3], -r2 + ldrht r9, [r7], #128 + ldrht r4, [r3], #-75 + ldrht r9, [r7], r2 + ldrht r4, [r3], -r2 + ldrsb r3, [r4] + ldrsb r2, [r7, #17] + ldrsb r1, [r8, #255]! + ldrsb r12, [sp], #9 + ldrsb r6, [r5, r4] + ldrsb r3, [r8, r11]! + ldrsb r1, [r2, -r1]! + ldrsb r9, [r7], r2 + ldrsb r4, [r3], -r2 + ldrsbt r5, [r6], #1 + ldrsbt r3, [r8], #-12 + ldrsbt r8, [r9], r5 + ldrsbt r2, [r1], -r4 + ldrsh r5, [r9] + ldrsh r4, [r5, #7] + ldrsh r3, [r6, #55]! + ldrsh r2, [r7], #-9 + ldrsh r3, [r1, r5] + ldrsh r4, [r6, r1]! + ldrsh r5, [r3, -r6]! + ldrsh r6, [r9], r8 + ldrsh r7, [r8], -r3 + ldrsht r5, [r6], #1 + ldrsht r3, [r8], #-12 + ldrsht r8, [r9], r5 + ldrsht r2, [r1], -r4 + str r8, [r12] + str r7, [r1, #12] + str r3, [r5, #40]! + str r9, [sp], #4095 + str r1, [r7], #-128 + str r9, [r6, r3] + str r8, [r0, -r2] + str r7, [r1, r6]! + str r6, [sp, -r1]! + str r5, [r3], r9 + str r4, [r2], -r5 + str r3, [r4, -r2, lsl #2] + str r2, [r7], r3, asr #24 + strb r9, [r2] + strb r7, [r1, #3] + strb r6, [r4, #405]! + strb r5, [r7], #72 + strb r1, [sp], #-1 + strb r1, [r2, r9] + strb r2, [r3, -r8] + strb r3, [r4, r7]! + strb r4, [r5, -r6]! + strb r5, [r6], r5 + strb r6, [r2], -r4 + strb r7, [r12, -r3, lsl #5] + strb sp, [r7], r2, asr #12 + strbt r6, [r2], #12 + strbt r5, [r6], #-13 + strbt r4, [r9], r5 + strbt r3, [r8], -r2, lsl #3 + strd r0, r1, [r4] + strd r2, r3, [r6, #1] + strd r2, r3, [r7, #22]! + strd r4, r5, [r8], #7 + strd r4, r5, [sp], #0 + strd r6, r7, [lr], #0 + strd r6, r7, [r9], #-0 + strd r8, r9, [r4, r1] + strd r6, r7, [r3, r9]! + strd r6, r7, [r5], r8 + strd r4, r5, [r12], -r10 + strh r3, [r4] + strh r2, [r7, #4] + strh r1, [r8, #64]! + strh r12, [sp], #4 + strh r6, [r5, r4] + strh r3, [r8, r11]! + strh r1, [r2, -r1]! + strh r9, [r7], r2 + strh r4, [r3], -r2 + strht r2, [r5], #76 + strht r8, [r1], #-25 + strht r5, [r3], r4 + strht r6, [r8], -r0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 4 1.00 * ldr r5, [r7] +# CHECK-NEXT: 1 4 1.00 * ldr r6, [r3, #63] +# CHECK-NEXT: 2 4 1.00 * ldr r2, [r4, #4095]! +# CHECK-NEXT: 2 4 1.00 * ldr r1, [r2], #30 +# CHECK-NEXT: 2 4 1.00 * ldr r3, [r1], #-30 +# CHECK-NEXT: 1 4 1.00 * ldr r3, [r8, r1] +# CHECK-NEXT: 1 4 1.00 * ldr r2, [r5, -r3] +# CHECK-NEXT: 2 4 1.00 * ldr r1, [r5, r9]! +# CHECK-NEXT: 2 4 1.00 * ldr r6, [r7, -r8]! +# CHECK-NEXT: 2 4 1.00 * ldr r1, [r0, r2, lsr #3]! +# CHECK-NEXT: 2 4 1.00 * ldr r5, [r9], r2 +# CHECK-NEXT: 2 4 1.00 * ldr r4, [r3], -r6 +# CHECK-NEXT: 1 4 1.00 * ldr r3, [r8, -r2, lsl #15] +# CHECK-NEXT: 2 4 1.00 * ldr r1, [r5], r3, asr #15 +# CHECK-NEXT: 1 4 1.00 * ldrb r3, [r8] +# CHECK-NEXT: 1 4 1.00 * ldrb r1, [sp, #63] +# CHECK-NEXT: 2 4 1.00 * ldrb r9, [r3, #4095]! +# CHECK-NEXT: 2 4 1.00 * ldrb r8, [r1], #22 +# CHECK-NEXT: 2 4 1.00 * ldrb r2, [r7], #-19 +# CHECK-NEXT: 1 4 1.00 * ldrb r9, [r8, r5] +# CHECK-NEXT: 1 4 1.00 * ldrb r1, [r5, -r1] +# CHECK-NEXT: 2 4 1.00 * ldrb r3, [r5, r2]! +# CHECK-NEXT: 2 4 1.00 * ldrb r6, [r9, -r3]! +# CHECK-NEXT: 2 4 1.00 * ldrb r2, [r1], r4 +# CHECK-NEXT: 2 4 1.00 * ldrb r8, [r4], -r5 +# CHECK-NEXT: 1 4 1.00 * ldrb r7, [r12, -r1, lsl #15] +# CHECK-NEXT: 2 4 1.00 * ldrb r5, [r2], r9, asr #15 +# CHECK-NEXT: 2 4 1.00 * ldrbt r3, [r1], #4 +# CHECK-NEXT: 2 4 1.00 * ldrbt r2, [r8], #-8 +# CHECK-NEXT: 2 4 1.00 * ldrbt r8, [r7], r6 +# CHECK-NEXT: 2 4 1.00 * ldrbt r1, [r2], -r6, lsl #12 +# CHECK-NEXT: 2 4 2.00 * ldrd r0, r1, [r5] +# CHECK-NEXT: 2 4 2.00 * ldrd r8, r9, [r2, #15] +# CHECK-NEXT: 4 4 2.00 * ldrd r2, r3, [r9, #32]! +# CHECK-NEXT: 4 4 2.00 * ldrd r6, r7, [r1], #8 +# CHECK-NEXT: 4 4 2.00 * ldrd r2, r3, [r8], #0 +# CHECK-NEXT: 4 4 2.00 * ldrd r2, r3, [r8], #0 +# CHECK-NEXT: 4 4 2.00 * ldrd r2, r3, [r8], #-0 +# CHECK-NEXT: 2 4 2.00 * ldrd r4, r5, [r1, r3] +# CHECK-NEXT: 4 4 2.00 * ldrd r4, r5, [r7, r2]! +# CHECK-NEXT: 4 4 2.00 * ldrd r0, r1, [r8], r12 +# CHECK-NEXT: 4 4 2.00 * ldrd r0, r1, [r8], -r12 +# CHECK-NEXT: 1 4 1.00 * ldrh r3, [r4] +# CHECK-NEXT: 1 4 1.00 * ldrh r2, [r7, #4] +# CHECK-NEXT: 1 4 1.00 * ldrh r1, [r8, #64]! +# CHECK-NEXT: 2 4 1.00 * ldrh r12, [sp], #4 +# CHECK-NEXT: 1 4 1.00 * ldrh r6, [r5, r4] +# CHECK-NEXT: 1 4 1.00 * ldrh r3, [r8, r11]! +# CHECK-NEXT: 1 4 1.00 * ldrh r1, [r2, -r1]! +# CHECK-NEXT: 2 4 1.00 * ldrh r9, [r7], r2 +# CHECK-NEXT: 2 4 1.00 * ldrh r4, [r3], -r2 +# CHECK-NEXT: 2 4 1.00 * ldrht r9, [r7], #128 +# CHECK-NEXT: 2 4 1.00 * ldrht r4, [r3], #-75 +# CHECK-NEXT: 2 4 1.00 * ldrht r9, [r7], r2 +# CHECK-NEXT: 2 4 1.00 * ldrht r4, [r3], -r2 +# CHECK-NEXT: 1 4 1.00 * ldrsb r3, [r4] +# CHECK-NEXT: 1 4 1.00 * ldrsb r2, [r7, #17] +# CHECK-NEXT: 1 4 1.00 * ldrsb r1, [r8, #255]! +# CHECK-NEXT: 2 4 1.00 * ldrsb r12, [sp], #9 +# CHECK-NEXT: 1 4 1.00 * ldrsb r6, [r5, r4] +# CHECK-NEXT: 1 4 1.00 * ldrsb r3, [r8, r11]! +# CHECK-NEXT: 1 4 1.00 * ldrsb r1, [r2, -r1]! +# CHECK-NEXT: 2 4 1.00 * ldrsb r9, [r7], r2 +# CHECK-NEXT: 2 4 1.00 * ldrsb r4, [r3], -r2 +# CHECK-NEXT: 2 4 1.00 * ldrsbt r5, [r6], #1 +# CHECK-NEXT: 2 4 1.00 * ldrsbt r3, [r8], #-12 +# CHECK-NEXT: 2 4 1.00 * ldrsbt r8, [r9], r5 +# CHECK-NEXT: 2 4 1.00 * ldrsbt r2, [r1], -r4 +# CHECK-NEXT: 1 4 1.00 * ldrsh r5, [r9] +# CHECK-NEXT: 1 4 1.00 * ldrsh r4, [r5, #7] +# CHECK-NEXT: 1 4 1.00 * ldrsh r3, [r6, #55]! +# CHECK-NEXT: 2 4 1.00 * ldrsh r2, [r7], #-9 +# CHECK-NEXT: 1 4 1.00 * ldrsh r3, [r1, r5] +# CHECK-NEXT: 1 4 1.00 * ldrsh r4, [r6, r1]! +# CHECK-NEXT: 1 4 1.00 * ldrsh r5, [r3, -r6]! +# CHECK-NEXT: 2 4 1.00 * ldrsh r6, [r9], r8 +# CHECK-NEXT: 2 4 1.00 * ldrsh r7, [r8], -r3 +# CHECK-NEXT: 2 4 1.00 * ldrsht r5, [r6], #1 +# CHECK-NEXT: 2 4 1.00 * ldrsht r3, [r8], #-12 +# CHECK-NEXT: 2 4 1.00 * ldrsht r8, [r9], r5 +# CHECK-NEXT: 2 4 1.00 * ldrsht r2, [r1], -r4 +# CHECK-NEXT: 1 1 1.00 * str r8, [r12] +# CHECK-NEXT: 1 1 1.00 * str r7, [r1, #12] +# CHECK-NEXT: 2 1 1.00 * str r3, [r5, #40]! +# CHECK-NEXT: 2 1 1.00 * str r9, [sp], #4095 +# CHECK-NEXT: 2 1 1.00 * str r1, [r7], #-128 +# CHECK-NEXT: 1 1 1.00 * str r9, [r6, r3] +# CHECK-NEXT: 1 1 1.00 * str r8, [r0, -r2] +# CHECK-NEXT: 2 1 1.00 * str r7, [r1, r6]! +# CHECK-NEXT: 2 1 1.00 * str r6, [sp, -r1]! +# CHECK-NEXT: 2 2 1.00 * str r5, [r3], r9 +# CHECK-NEXT: 2 2 1.00 * str r4, [r2], -r5 +# CHECK-NEXT: 1 1 1.00 * str r3, [r4, -r2, lsl #2] +# CHECK-NEXT: 2 2 1.00 * str r2, [r7], r3, asr #24 +# CHECK-NEXT: 1 1 1.00 * strb r9, [r2] +# CHECK-NEXT: 1 1 1.00 * strb r7, [r1, #3] +# CHECK-NEXT: 2 1 1.00 * strb r6, [r4, #405]! +# CHECK-NEXT: 2 1 1.00 * strb r5, [r7], #72 +# CHECK-NEXT: 2 1 1.00 * strb r1, [sp], #-1 +# CHECK-NEXT: 1 1 1.00 * strb r1, [r2, r9] +# CHECK-NEXT: 1 1 1.00 * strb r2, [r3, -r8] +# CHECK-NEXT: 2 1 1.00 * strb r3, [r4, r7]! +# CHECK-NEXT: 2 1 1.00 * strb r4, [r5, -r6]! +# CHECK-NEXT: 2 2 1.00 * strb r5, [r6], r5 +# CHECK-NEXT: 2 2 1.00 * strb r6, [r2], -r4 +# CHECK-NEXT: 1 1 1.00 * strb r7, [r12, -r3, lsl #5] +# CHECK-NEXT: 2 2 1.00 * strb sp, [r7], r2, asr #12 +# CHECK-NEXT: 2 1 1.00 U strbt r6, [r2], #12 +# CHECK-NEXT: 2 1 1.00 U strbt r5, [r6], #-13 +# CHECK-NEXT: 2 2 1.00 U strbt r4, [r9], r5 +# CHECK-NEXT: 2 2 1.00 U strbt r3, [r8], -r2, lsl #3 +# CHECK-NEXT: 1 1 1.00 * strd r0, r1, [r4] +# CHECK-NEXT: 1 1 1.00 * strd r2, r3, [r6, #1] +# CHECK-NEXT: 2 1 1.00 * strd r2, r3, [r7, #22]! +# CHECK-NEXT: 2 1 1.00 * strd r4, r5, [r8], #7 +# CHECK-NEXT: 2 1 1.00 * strd r4, r5, [sp], #0 +# CHECK-NEXT: 2 1 1.00 * strd r6, r7, [lr], #0 +# CHECK-NEXT: 2 1 1.00 * strd r6, r7, [r9], #-0 +# CHECK-NEXT: 1 1 1.00 * strd r8, r9, [r4, r1] +# CHECK-NEXT: 2 1 1.00 * strd r6, r7, [r3, r9]! +# CHECK-NEXT: 2 1 1.00 * strd r6, r7, [r5], r8 +# CHECK-NEXT: 2 1 1.00 * strd r4, r5, [r12], -r10 +# CHECK-NEXT: 1 1 1.00 * strh r3, [r4] +# CHECK-NEXT: 1 1 1.00 * strh r2, [r7, #4] +# CHECK-NEXT: 2 1 1.00 U strh r1, [r8, #64]! +# CHECK-NEXT: 2 1 1.00 * strh r12, [sp], #4 +# CHECK-NEXT: 1 1 1.00 * strh r6, [r5, r4] +# CHECK-NEXT: 2 1 1.00 U strh r3, [r8, r11]! +# CHECK-NEXT: 2 1 1.00 U strh r1, [r2, -r1]! +# CHECK-NEXT: 2 1 1.00 * strh r9, [r7], r2 +# CHECK-NEXT: 2 1 1.00 * strh r4, [r3], -r2 +# CHECK-NEXT: 2 1 1.00 U strht r2, [r5], #76 +# CHECK-NEXT: 2 1 1.00 U strht r8, [r1], #-25 +# CHECK-NEXT: 2 1 1.00 U strht r5, [r3], r4 +# CHECK-NEXT: 2 1 1.00 U strht r6, [r8], -r0 + +# CHECK: Resources: +# CHECK-NEXT: [0] - A57UnitB +# CHECK-NEXT: [1.0] - A57UnitI +# CHECK-NEXT: [1.1] - A57UnitI +# CHECK-NEXT: [2] - A57UnitL +# CHECK-NEXT: [3] - A57UnitM +# CHECK-NEXT: [4] - A57UnitS +# CHECK-NEXT: [5] - A57UnitW +# CHECK-NEXT: [6] - A57UnitX + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1.0] [1.1] [2] [3] [4] [5] [6] +# CHECK-NEXT: - 44.00 44.00 92.00 8.00 54.00 - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1.0] [1.1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: - - - 1.00 - - - - ldr r5, [r7] +# CHECK-NEXT: - - - 1.00 - - - - ldr r6, [r3, #63] +# CHECK-NEXT: - 0.50 0.50 1.00 - - - - ldr r2, [r4, #4095]! +# CHECK-NEXT: - 0.50 0.50 1.00 - - - - ldr r1, [r2], #30 +# CHECK-NEXT: - 0.50 0.50 1.00 - - - - ldr r3, [r1], #-30 +# CHECK-NEXT: - - - 1.00 - - - - ldr r3, [r8, r1] +# CHECK-NEXT: - - - 1.00 - - - - ldr r2, [r5, -r3] +# CHECK-NEXT: - 0.50 0.50 1.00 - - - - ldr r1, [r5, r9]! +# CHECK-NEXT: - 0.50 0.50 1.00 - - - - ldr r6, [r7, -r8]! +# CHECK-NEXT: - 0.50 0.50 1.00 - - - - ldr r1, [r0, r2, lsr #3]! +# CHECK-NEXT: - 0.50 0.50 1.00 - - - - ldr r5, [r9], r2 +# CHECK-NEXT: - 0.50 0.50 1.00 - - - - ldr r4, [r3], -r6 +# CHECK-NEXT: - - - 1.00 - - - - ldr r3, [r8, -r2, lsl #15] +# CHECK-NEXT: - 0.50 0.50 1.00 - - - - ldr r1, [r5], r3, asr #15 +# CHECK-NEXT: - - - 1.00 - - - - ldrb r3, [r8] +# CHECK-NEXT: - - - 1.00 - - - - ldrb r1, [sp, #63] +# CHECK-NEXT: - 0.50 0.50 1.00 - - - - ldrb r9, [r3, #4095]! +# CHECK-NEXT: - 0.50 0.50 1.00 - - - - ldrb r8, [r1], #22 +# CHECK-NEXT: - 0.50 0.50 1.00 - - - - ldrb r2, [r7], #-19 +# CHECK-NEXT: - - - 1.00 - - - - ldrb r9, [r8, r5] +# CHECK-NEXT: - - - 1.00 - - - - ldrb r1, [r5, -r1] +# CHECK-NEXT: - 0.50 0.50 1.00 - - - - ldrb r3, [r5, r2]! +# CHECK-NEXT: - 0.50 0.50 1.00 - - - - ldrb r6, [r9, -r3]! +# CHECK-NEXT: - 0.50 0.50 1.00 - - - - ldrb r2, [r1], r4 +# CHECK-NEXT: - 0.50 0.50 1.00 - - - - ldrb r8, [r4], -r5 +# CHECK-NEXT: - - - 1.00 - - - - ldrb r7, [r12, -r1, lsl #15] +# CHECK-NEXT: - 0.50 0.50 1.00 - - - - ldrb r5, [r2], r9, asr #15 +# CHECK-NEXT: - 0.50 0.50 1.00 - - - - ldrbt r3, [r1], #4 +# CHECK-NEXT: - 0.50 0.50 1.00 - - - - ldrbt r2, [r8], #-8 +# CHECK-NEXT: - 0.50 0.50 1.00 - - - - ldrbt r8, [r7], r6 +# CHECK-NEXT: - 0.50 0.50 1.00 - - - - ldrbt r1, [r2], -r6, lsl #12 +# CHECK-NEXT: - - - 2.00 - - - - ldrd r0, r1, [r5] +# CHECK-NEXT: - - - 2.00 - - - - ldrd r8, r9, [r2, #15] +# CHECK-NEXT: - 1.00 1.00 2.00 - - - - ldrd r2, r3, [r9, #32]! +# CHECK-NEXT: - 1.00 1.00 2.00 - - - - ldrd r6, r7, [r1], #8 +# CHECK-NEXT: - 1.00 1.00 2.00 - - - - ldrd r2, r3, [r8], #0 +# CHECK-NEXT: - 1.00 1.00 2.00 - - - - ldrd r2, r3, [r8], #0 +# CHECK-NEXT: - 1.00 1.00 2.00 - - - - ldrd r2, r3, [r8], #-0 +# CHECK-NEXT: - - - 2.00 - - - - ldrd r4, r5, [r1, r3] +# CHECK-NEXT: - 1.00 1.00 2.00 - - - - ldrd r4, r5, [r7, r2]! +# CHECK-NEXT: - 1.00 1.00 2.00 - - - - ldrd r0, r1, [r8], r12 +# CHECK-NEXT: - 1.00 1.00 2.00 - - - - ldrd r0, r1, [r8], -r12 +# CHECK-NEXT: - - - 1.00 - - - - ldrh r3, [r4] +# CHECK-NEXT: - - - 1.00 - - - - ldrh r2, [r7, #4] +# CHECK-NEXT: - - - 1.00 - - - - ldrh r1, [r8, #64]! +# CHECK-NEXT: - 0.50 0.50 1.00 - - - - ldrh r12, [sp], #4 +# CHECK-NEXT: - - - 1.00 - - - - ldrh r6, [r5, r4] +# CHECK-NEXT: - - - 1.00 - - - - ldrh r3, [r8, r11]! +# CHECK-NEXT: - - - 1.00 - - - - ldrh r1, [r2, -r1]! +# CHECK-NEXT: - 0.50 0.50 1.00 - - - - ldrh r9, [r7], r2 +# CHECK-NEXT: - 0.50 0.50 1.00 - - - - ldrh r4, [r3], -r2 +# CHECK-NEXT: - 0.50 0.50 1.00 - - - - ldrht r9, [r7], #128 +# CHECK-NEXT: - 0.50 0.50 1.00 - - - - ldrht r4, [r3], #-75 +# CHECK-NEXT: - 0.50 0.50 1.00 - - - - ldrht r9, [r7], r2 +# CHECK-NEXT: - 0.50 0.50 1.00 - - - - ldrht r4, [r3], -r2 +# CHECK-NEXT: - - - 1.00 - - - - ldrsb r3, [r4] +# CHECK-NEXT: - - - 1.00 - - - - ldrsb r2, [r7, #17] +# CHECK-NEXT: - - - 1.00 - - - - ldrsb r1, [r8, #255]! +# CHECK-NEXT: - 0.50 0.50 1.00 - - - - ldrsb r12, [sp], #9 +# CHECK-NEXT: - - - 1.00 - - - - ldrsb r6, [r5, r4] +# CHECK-NEXT: - - - 1.00 - - - - ldrsb r3, [r8, r11]! +# CHECK-NEXT: - - - 1.00 - - - - ldrsb r1, [r2, -r1]! +# CHECK-NEXT: - 0.50 0.50 1.00 - - - - ldrsb r9, [r7], r2 +# CHECK-NEXT: - 0.50 0.50 1.00 - - - - ldrsb r4, [r3], -r2 +# CHECK-NEXT: - 0.50 0.50 1.00 - - - - ldrsbt r5, [r6], #1 +# CHECK-NEXT: - 0.50 0.50 1.00 - - - - ldrsbt r3, [r8], #-12 +# CHECK-NEXT: - 0.50 0.50 1.00 - - - - ldrsbt r8, [r9], r5 +# CHECK-NEXT: - 0.50 0.50 1.00 - - - - ldrsbt r2, [r1], -r4 +# CHECK-NEXT: - - - 1.00 - - - - ldrsh r5, [r9] +# CHECK-NEXT: - - - 1.00 - - - - ldrsh r4, [r5, #7] +# CHECK-NEXT: - - - 1.00 - - - - ldrsh r3, [r6, #55]! +# CHECK-NEXT: - 0.50 0.50 1.00 - - - - ldrsh r2, [r7], #-9 +# CHECK-NEXT: - - - 1.00 - - - - ldrsh r3, [r1, r5] +# CHECK-NEXT: - - - 1.00 - - - - ldrsh r4, [r6, r1]! +# CHECK-NEXT: - - - 1.00 - - - - ldrsh r5, [r3, -r6]! +# CHECK-NEXT: - 0.50 0.50 1.00 - - - - ldrsh r6, [r9], r8 +# CHECK-NEXT: - 0.50 0.50 1.00 - - - - ldrsh r7, [r8], -r3 +# CHECK-NEXT: - 0.50 0.50 1.00 - - - - ldrsht r5, [r6], #1 +# CHECK-NEXT: - 0.50 0.50 1.00 - - - - ldrsht r3, [r8], #-12 +# CHECK-NEXT: - 0.50 0.50 1.00 - - - - ldrsht r8, [r9], r5 +# CHECK-NEXT: - 0.50 0.50 1.00 - - - - ldrsht r2, [r1], -r4 +# CHECK-NEXT: - - - - - 1.00 - - str r8, [r12] +# CHECK-NEXT: - - - - - 1.00 - - str r7, [r1, #12] +# CHECK-NEXT: - 0.50 0.50 - - 1.00 - - str r3, [r5, #40]! +# CHECK-NEXT: - 0.50 0.50 - - 1.00 - - str r9, [sp], #4095 +# CHECK-NEXT: - 0.50 0.50 - - 1.00 - - str r1, [r7], #-128 +# CHECK-NEXT: - - - - - 1.00 - - str r9, [r6, r3] +# CHECK-NEXT: - - - - - 1.00 - - str r8, [r0, -r2] +# CHECK-NEXT: - 0.50 0.50 - - 1.00 - - str r7, [r1, r6]! +# CHECK-NEXT: - 0.50 0.50 - - 1.00 - - str r6, [sp, -r1]! +# CHECK-NEXT: - - - - 1.00 1.00 - - str r5, [r3], r9 +# CHECK-NEXT: - - - - 1.00 1.00 - - str r4, [r2], -r5 +# CHECK-NEXT: - - - - - 1.00 - - str r3, [r4, -r2, lsl #2] +# CHECK-NEXT: - - - - 1.00 1.00 - - str r2, [r7], r3, asr #24 +# CHECK-NEXT: - - - - - 1.00 - - strb r9, [r2] +# CHECK-NEXT: - - - - - 1.00 - - strb r7, [r1, #3] +# CHECK-NEXT: - 0.50 0.50 - - 1.00 - - strb r6, [r4, #405]! +# CHECK-NEXT: - 0.50 0.50 - - 1.00 - - strb r5, [r7], #72 +# CHECK-NEXT: - 0.50 0.50 - - 1.00 - - strb r1, [sp], #-1 +# CHECK-NEXT: - - - - - 1.00 - - strb r1, [r2, r9] +# CHECK-NEXT: - - - - - 1.00 - - strb r2, [r3, -r8] +# CHECK-NEXT: - 0.50 0.50 - - 1.00 - - strb r3, [r4, r7]! +# CHECK-NEXT: - 0.50 0.50 - - 1.00 - - strb r4, [r5, -r6]! +# CHECK-NEXT: - - - - 1.00 1.00 - - strb r5, [r6], r5 +# CHECK-NEXT: - - - - 1.00 1.00 - - strb r6, [r2], -r4 +# CHECK-NEXT: - - - - - 1.00 - - strb r7, [r12, -r3, lsl #5] +# CHECK-NEXT: - - - - 1.00 1.00 - - strb sp, [r7], r2, asr #12 +# CHECK-NEXT: - 0.50 0.50 - - 1.00 - - strbt r6, [r2], #12 +# CHECK-NEXT: - 0.50 0.50 - - 1.00 - - strbt r5, [r6], #-13 +# CHECK-NEXT: - - - - 1.00 1.00 - - strbt r4, [r9], r5 +# CHECK-NEXT: - - - - 1.00 1.00 - - strbt r3, [r8], -r2, lsl #3 +# CHECK-NEXT: - - - - - 1.00 - - strd r0, r1, [r4] +# CHECK-NEXT: - - - - - 1.00 - - strd r2, r3, [r6, #1] +# CHECK-NEXT: - 0.50 0.50 - - 1.00 - - strd r2, r3, [r7, #22]! +# CHECK-NEXT: - 0.50 0.50 - - 1.00 - - strd r4, r5, [r8], #7 +# CHECK-NEXT: - 0.50 0.50 - - 1.00 - - strd r4, r5, [sp], #0 +# CHECK-NEXT: - 0.50 0.50 - - 1.00 - - strd r6, r7, [lr], #0 +# CHECK-NEXT: - 0.50 0.50 - - 1.00 - - strd r6, r7, [r9], #-0 +# CHECK-NEXT: - - - - - 1.00 - - strd r8, r9, [r4, r1] +# CHECK-NEXT: - 0.50 0.50 - - 1.00 - - strd r6, r7, [r3, r9]! +# CHECK-NEXT: - 0.50 0.50 - - 1.00 - - strd r6, r7, [r5], r8 +# CHECK-NEXT: - 0.50 0.50 - - 1.00 - - strd r4, r5, [r12], -r10 +# CHECK-NEXT: - - - - - 1.00 - - strh r3, [r4] +# CHECK-NEXT: - - - - - 1.00 - - strh r2, [r7, #4] +# CHECK-NEXT: - 0.50 0.50 - - 1.00 - - strh r1, [r8, #64]! +# CHECK-NEXT: - 0.50 0.50 - - 1.00 - - strh r12, [sp], #4 +# CHECK-NEXT: - - - - - 1.00 - - strh r6, [r5, r4] +# CHECK-NEXT: - 0.50 0.50 - - 1.00 - - strh r3, [r8, r11]! +# CHECK-NEXT: - 0.50 0.50 - - 1.00 - - strh r1, [r2, -r1]! +# CHECK-NEXT: - 0.50 0.50 - - 1.00 - - strh r9, [r7], r2 +# CHECK-NEXT: - 0.50 0.50 - - 1.00 - - strh r4, [r3], -r2 +# CHECK-NEXT: - 0.50 0.50 - - 1.00 - - strht r2, [r5], #76 +# CHECK-NEXT: - 0.50 0.50 - - 1.00 - - strht r8, [r1], #-25 +# CHECK-NEXT: - 0.50 0.50 - - 1.00 - - strht r5, [r3], r4 +# CHECK-NEXT: - 0.50 0.50 - - 1.00 - - strht r6, [r8], -r0 From 96685faf6dd9b044394af6f7a9d8b10fadb327b5 Mon Sep 17 00:00:00 2001 From: Kirill Bobyrev Date: Wed, 21 Oct 2020 14:18:36 +0200 Subject: [PATCH 038/179] [llvm] Use early exits and get rid of if-return-else-return pattern; NFC https://llvm.org/docs/CodingStandards.html#use-early-exits-and-continue-to-simplify-code Reviewed By: kadircet Differential Revision: https://reviews.llvm.org/D89857 --- llvm/lib/Support/Path.cpp | 34 ++++++++++++++-------------------- 1 file changed, 14 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Support/Path.cpp b/llvm/lib/Support/Path.cpp index bbc02a246a50a..ef223ae5ac1d6 100644 --- a/llvm/lib/Support/Path.cpp +++ b/llvm/lib/Support/Path.cpp @@ -354,10 +354,9 @@ StringRef root_path(StringRef path, Style style) { if ((++pos != e) && is_separator((*pos)[0], style)) { // {C:/,//net/}, so get the first two components. return path.substr(0, b->size() + pos->size()); - } else { - // just {C:,//net}, return the first component. - return *b; } + // just {C:,//net}, return the first component. + return *b; } // POSIX style root directory. @@ -467,8 +466,7 @@ StringRef parent_path(StringRef path, Style style) { size_t end_pos = parent_path_end(path, style); if (end_pos == StringRef::npos) return StringRef(); - else - return path.substr(0, end_pos); + return path.substr(0, end_pos); } void remove_filename(SmallVectorImpl &path, Style style) { @@ -581,12 +579,10 @@ StringRef stem(StringRef path, Style style) { size_t pos = fname.find_last_of('.'); if (pos == StringRef::npos) return fname; - else - if ((fname.size() == 1 && fname == ".") || - (fname.size() == 2 && fname == "..")) - return fname; - else - return fname.substr(0, pos); + if ((fname.size() == 1 && fname == ".") || + (fname.size() == 2 && fname == "..")) + return fname; + return fname.substr(0, pos); } StringRef extension(StringRef path, Style style) { @@ -594,12 +590,10 @@ StringRef extension(StringRef path, Style style) { size_t pos = fname.find_last_of('.'); if (pos == StringRef::npos) return StringRef(); - else - if ((fname.size() == 1 && fname == ".") || - (fname.size() == 2 && fname == "..")) - return StringRef(); - else - return fname.substr(pos); + if ((fname.size() == 1 && fname == ".") || + (fname.size() == 2 && fname == "..")) + return StringRef(); + return fname.substr(pos); } bool is_separator(char value, Style style) { @@ -1299,7 +1293,7 @@ Expected TempFile::create(const Twine &Model, unsigned Mode) { #endif return std::move(Ret); } -} +} // namespace fs -} // end namsspace sys -} // end namespace llvm +} // namespace sys +} // namespace llvm From 81c0d36a1836c9be7c34a6d8198310ad7ea9bb53 Mon Sep 17 00:00:00 2001 From: Ben Dunbobbin Date: Wed, 21 Oct 2020 13:09:15 +0100 Subject: [PATCH 039/179] [LIT] error if directly named test won't be run indirectly Currently, a LIT test named directly (on the command line) will be run even if the name of the test file does not meet the rules to be considered a test in the LIT test configuration files for its test suite. For example, if the test does not have a recognised file extension. This makes it relatively easy to write a LIT test that won't actually be run. I did in: https://reviews.llvm.org/D82567 This patch adds an error to avoid users doing that. There is a small performance overhead for this check. A command line option has been added so that users can opt into the old behaviour. Differential Revision: https://reviews.llvm.org/D83069 --- llvm/utils/lit/lit/LitTestCase.py | 4 +- llvm/utils/lit/lit/cl_arguments.py | 6 +++ llvm/utils/lit/lit/discovery.py | 46 ++++++++++++++----- llvm/utils/lit/lit/main.py | 3 +- .../lit/tests/Inputs/discovery/test.not-txt | 1 + llvm/utils/lit/tests/discovery.py | 33 +++++++++---- llvm/utils/lit/tests/unit/TestRunner.py | 4 +- 7 files changed, 71 insertions(+), 26 deletions(-) create mode 100644 llvm/utils/lit/tests/Inputs/discovery/test.not-txt diff --git a/llvm/utils/lit/lit/LitTestCase.py b/llvm/utils/lit/lit/LitTestCase.py index 951f7be958e2e..81ed61ab09643 100644 --- a/llvm/utils/lit/lit/LitTestCase.py +++ b/llvm/utils/lit/lit/LitTestCase.py @@ -55,8 +55,8 @@ def load_test_suite(inputs): params={}) # Perform test discovery. - tests = lit.discovery.find_tests_for_inputs(lit_config, inputs) + tests = lit.discovery.find_tests_for_inputs(lit_config, inputs, False) test_adaptors = [LitTestCase(t, lit_config) for t in tests] # Return a unittest test suite which just runs the tests in order. - return unittest.TestSuite(test_adaptors) + return unittest.TestSuite(test_adaptors) \ No newline at end of file diff --git a/llvm/utils/lit/lit/cl_arguments.py b/llvm/utils/lit/lit/cl_arguments.py index 69166e00aba8c..591d4f9aaafb5 100644 --- a/llvm/utils/lit/lit/cl_arguments.py +++ b/llvm/utils/lit/lit/cl_arguments.py @@ -123,6 +123,12 @@ def parse_args(): execution_group.add_argument("--allow-empty-runs", help="Do not fail the run if all tests are filtered out", action="store_true") + execution_group.add_argument("--no-indirectly-run-check", + dest="indirectlyRunCheck", + help="Do not error if a test would not be run if the user had " + "specified the containing directory instead of naming the " + "test directly.", + action="store_false") selection_group = parser.add_argument_group("Test Selection") selection_group.add_argument("--max-tests", diff --git a/llvm/utils/lit/lit/discovery.py b/llvm/utils/lit/lit/discovery.py index d8054543d018d..d83e90977e447 100644 --- a/llvm/utils/lit/lit/discovery.py +++ b/llvm/utils/lit/lit/discovery.py @@ -125,7 +125,8 @@ def search(path_in_suite): return search(path_in_suite) -def getTests(path, litConfig, testSuiteCache, localConfigCache): +def getTests(path, litConfig, testSuiteCache, + localConfigCache, indirectlyRunCheck): # Find the test suite for this input and its relative path. ts,path_in_suite = getTestSuite(path, litConfig, testSuiteCache) if ts is None: @@ -137,10 +138,10 @@ def getTests(path, litConfig, testSuiteCache, localConfigCache): path_in_suite)) return ts, getTestsInSuite(ts, path_in_suite, litConfig, - testSuiteCache, localConfigCache) + testSuiteCache, localConfigCache, indirectlyRunCheck) def getTestsInSuite(ts, path_in_suite, litConfig, - testSuiteCache, localConfigCache): + testSuiteCache, localConfigCache, indirectlyRunCheck): # Check that the source path exists (errors here are reported by the # caller). source_path = ts.getSourcePath(path_in_suite) @@ -149,8 +150,30 @@ def getTestsInSuite(ts, path_in_suite, litConfig, # Check if the user named a test directly. if not os.path.isdir(source_path): - lc = getLocalConfig(ts, path_in_suite[:-1], litConfig, localConfigCache) - yield Test.Test(ts, path_in_suite, lc) + test_dir_in_suite = path_in_suite[:-1] + lc = getLocalConfig(ts, test_dir_in_suite, litConfig, localConfigCache) + test = Test.Test(ts, path_in_suite, lc) + + # Issue a error if the specified test would not be run if + # the user had specified the containing directory instead of + # of naming the test directly. This helps to avoid writing + # tests which are not executed. The check adds some performance + # overhead which might be important if a large number of tests + # are being run directly. + # --no-indirectly-run-check: skips this check. + if indirectlyRunCheck and lc.test_format is not None: + found = False + for res in lc.test_format.getTestsInDirectory(ts, test_dir_in_suite, + litConfig, lc): + if test.getFullName() == res.getFullName(): + found = True + break + if not found: + litConfig.error( + '%r would not be run indirectly: change name or LIT config' + % test.getFullName()) + + yield test return # Otherwise we have a directory to search for tests, start by getting the @@ -196,10 +219,11 @@ def getTestsInSuite(ts, path_in_suite, litConfig, # Otherwise, load from the nested test suite, if present. if sub_ts is not None: subiter = getTestsInSuite(sub_ts, subpath_in_suite, litConfig, - testSuiteCache, localConfigCache) + testSuiteCache, localConfigCache, + indirectlyRunCheck) else: subiter = getTestsInSuite(ts, subpath, litConfig, testSuiteCache, - localConfigCache) + localConfigCache, indirectlyRunCheck) N = 0 for res in subiter: @@ -208,7 +232,7 @@ def getTestsInSuite(ts, path_in_suite, litConfig, if sub_ts and not N: litConfig.warning('test suite %r contained no tests' % sub_ts.name) -def find_tests_for_inputs(lit_config, inputs): +def find_tests_for_inputs(lit_config, inputs, indirectlyRunCheck): """ find_tests_for_inputs(lit_config, inputs) -> [Test] @@ -237,8 +261,8 @@ def find_tests_for_inputs(lit_config, inputs): local_config_cache = {} for input in actual_inputs: prev = len(tests) - tests.extend(getTests(input, lit_config, - test_suite_cache, local_config_cache)[1]) + tests.extend(getTests(input, lit_config, test_suite_cache, + local_config_cache, indirectlyRunCheck)[1]) if prev == len(tests): lit_config.warning('input %r contained no tests' % input) @@ -247,4 +271,4 @@ def find_tests_for_inputs(lit_config, inputs): sys.stderr.write('%d errors, exiting.\n' % lit_config.numErrors) sys.exit(2) - return tests + return tests \ No newline at end of file diff --git a/llvm/utils/lit/lit/main.py b/llvm/utils/lit/lit/main.py index d94d7280809da..6c9885c4d4be7 100755 --- a/llvm/utils/lit/lit/main.py +++ b/llvm/utils/lit/lit/main.py @@ -39,7 +39,8 @@ def main(builtin_params={}): config_prefix=opts.configPrefix, echo_all_commands=opts.echoAllCommands) - discovered_tests = lit.discovery.find_tests_for_inputs(lit_config, opts.test_paths) + discovered_tests = lit.discovery.find_tests_for_inputs(lit_config, opts.test_paths, + opts.indirectlyRunCheck) if not discovered_tests: sys.stderr.write('error: did not discover any tests for provided path(s)\n') sys.exit(2) diff --git a/llvm/utils/lit/tests/Inputs/discovery/test.not-txt b/llvm/utils/lit/tests/Inputs/discovery/test.not-txt new file mode 100644 index 0000000000000..b80b60b7a2794 --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/discovery/test.not-txt @@ -0,0 +1 @@ +# RUN: true diff --git a/llvm/utils/lit/tests/discovery.py b/llvm/utils/lit/tests/discovery.py index 94b227f33fb3d..84ee2221efe72 100644 --- a/llvm/utils/lit/tests/discovery.py +++ b/llvm/utils/lit/tests/discovery.py @@ -51,17 +51,17 @@ # CHECK-CONFIG-MAP-ERR: resolved input '{{.*(/|\\\\)config-map-discovery(/|\\\\)main-config}}' to 'config-map'::() -# Check discovery when exact test names are given. +# Check discovery when tests are named directly. # # RUN: %{lit} \ # RUN: %{inputs}/discovery/subdir/test-three.py \ # RUN: %{inputs}/discovery/subsuite/test-one.txt \ # RUN: -j 1 --show-tests --show-suites -v > %t.out -# RUN: FileCheck --check-prefix=CHECK-EXACT-TEST < %t.out %s +# RUN: FileCheck --check-prefix=CHECK-DIRECT-TEST < %t.out %s # -# CHECK-EXACT-TEST: -- Available Tests -- -# CHECK-EXACT-TEST: sub-suite :: test-one -# CHECK-EXACT-TEST: top-level-suite :: subdir/test-three +# CHECK-DIRECT-TEST: -- Available Tests -- +# CHECK-DIRECT-TEST: sub-suite :: test-one +# CHECK-DIRECT-TEST: top-level-suite :: subdir/test-three # Check discovery when config files end in .py # RUN: %{lit} %{inputs}/py-config-discovery \ @@ -122,18 +122,31 @@ # CHECK-ASEXEC-OUT: top-level-suite :: test-one # CHECK-ASEXEC-OUT: top-level-suite :: test-two -# Check discovery when exact test names are given. +# Check discovery when tests are named directly. # # FIXME: Note that using a path into a subsuite doesn't work correctly here. # # RUN: %{lit} \ # RUN: %{inputs}/exec-discovery/subdir/test-three.py \ # RUN: -j 1 --show-tests --show-suites -v > %t.out -# RUN: FileCheck --check-prefix=CHECK-ASEXEC-EXACT-TEST < %t.out %s +# RUN: FileCheck --check-prefix=CHECK-ASEXEC-DIRECT-TEST < %t.out %s # -# CHECK-ASEXEC-EXACT-TEST: -- Available Tests -- -# CHECK-ASEXEC-EXACT-TEST: top-level-suite :: subdir/test-three +# CHECK-ASEXEC-DIRECT-TEST: -- Available Tests -- +# CHECK-ASEXEC-DIRECT-TEST: top-level-suite :: subdir/test-three +# Check an error is emitted when the directly named test would not be run +# indirectly (e.g. when the directory containing the test is specified). +# +# RUN: not %{lit} \ +# RUN: %{inputs}/discovery/test.not-txt -j 1 2>%t.err +# RUN: FileCheck --check-prefix=CHECK-ERROR-INDIRECT-RUN-CHECK < %t.err %s +# +# CHECK-ERROR-INDIRECT-RUN-CHECK: error: 'top-level-suite :: test.not-txt' would not be run indirectly + +# Check that no error is emitted with --no-indirectly-run-check. +# +# RUN: %{lit} \ +# RUN: %{inputs}/discovery/test.not-txt -j 1 --no-indirectly-run-check # Check that we don't recurse infinitely when loading an site specific test # suite located inside the test source root. @@ -156,4 +169,4 @@ # CHECK-ASEXEC-INTREE-NEXT: Available Features: # CHECK-ASEXEC-INTREE-NEXT: Available Substitutions: # CHECK-ASEXEC-INTREE-NEXT: -- Available Tests -- -# CHECK-ASEXEC-INTREE-NEXT: exec-discovery-in-tree-suite :: test-one +# CHECK-ASEXEC-INTREE-NEXT: exec-discovery-in-tree-suite :: test-one \ No newline at end of file diff --git a/llvm/utils/lit/tests/unit/TestRunner.py b/llvm/utils/lit/tests/unit/TestRunner.py index 33e4ebc716f40..411146effb1c8 100644 --- a/llvm/utils/lit/tests/unit/TestRunner.py +++ b/llvm/utils/lit/tests/unit/TestRunner.py @@ -40,7 +40,7 @@ def load_keyword_parser_lit_tests(): test_path = os.path.dirname(os.path.dirname(__file__)) inputs = [os.path.join(test_path, 'Inputs/testrunner-custom-parsers/')] assert os.path.isdir(inputs[0]) - tests = lit.discovery.find_tests_for_inputs(lit_config, inputs) + tests = lit.discovery.find_tests_for_inputs(lit_config, inputs, False) assert len(tests) == 1 and "there should only be one test" TestIntegratedTestKeywordParser.inputTestCase = tests[0] @@ -291,4 +291,4 @@ def test_recursive_substitution_invalid_value(self): if __name__ == '__main__': TestIntegratedTestKeywordParser.load_keyword_parser_lit_tests() - unittest.main(verbosity=2) + unittest.main(verbosity=2) \ No newline at end of file From 6a5c19dd3956b6fe4607c8dd00a550bf3ead25b0 Mon Sep 17 00:00:00 2001 From: Ben Dunbobbin Date: Wed, 21 Oct 2020 13:29:58 +0100 Subject: [PATCH 040/179] [LIT] fixed up accidentally committed EOF problems. --- llvm/utils/lit/lit/LitTestCase.py | 2 +- llvm/utils/lit/lit/discovery.py | 2 +- llvm/utils/lit/tests/discovery.py | 2 +- llvm/utils/lit/tests/unit/TestRunner.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/utils/lit/lit/LitTestCase.py b/llvm/utils/lit/lit/LitTestCase.py index 81ed61ab09643..72d57689bf6a5 100644 --- a/llvm/utils/lit/lit/LitTestCase.py +++ b/llvm/utils/lit/lit/LitTestCase.py @@ -59,4 +59,4 @@ def load_test_suite(inputs): test_adaptors = [LitTestCase(t, lit_config) for t in tests] # Return a unittest test suite which just runs the tests in order. - return unittest.TestSuite(test_adaptors) \ No newline at end of file + return unittest.TestSuite(test_adaptors) diff --git a/llvm/utils/lit/lit/discovery.py b/llvm/utils/lit/lit/discovery.py index d83e90977e447..2f027a5b03fe4 100644 --- a/llvm/utils/lit/lit/discovery.py +++ b/llvm/utils/lit/lit/discovery.py @@ -271,4 +271,4 @@ def find_tests_for_inputs(lit_config, inputs, indirectlyRunCheck): sys.stderr.write('%d errors, exiting.\n' % lit_config.numErrors) sys.exit(2) - return tests \ No newline at end of file + return tests diff --git a/llvm/utils/lit/tests/discovery.py b/llvm/utils/lit/tests/discovery.py index 84ee2221efe72..cb04edaab04c2 100644 --- a/llvm/utils/lit/tests/discovery.py +++ b/llvm/utils/lit/tests/discovery.py @@ -169,4 +169,4 @@ # CHECK-ASEXEC-INTREE-NEXT: Available Features: # CHECK-ASEXEC-INTREE-NEXT: Available Substitutions: # CHECK-ASEXEC-INTREE-NEXT: -- Available Tests -- -# CHECK-ASEXEC-INTREE-NEXT: exec-discovery-in-tree-suite :: test-one \ No newline at end of file +# CHECK-ASEXEC-INTREE-NEXT: exec-discovery-in-tree-suite :: test-one diff --git a/llvm/utils/lit/tests/unit/TestRunner.py b/llvm/utils/lit/tests/unit/TestRunner.py index 411146effb1c8..b4eb1293f3f62 100644 --- a/llvm/utils/lit/tests/unit/TestRunner.py +++ b/llvm/utils/lit/tests/unit/TestRunner.py @@ -291,4 +291,4 @@ def test_recursive_substitution_invalid_value(self): if __name__ == '__main__': TestIntegratedTestKeywordParser.load_keyword_parser_lit_tests() - unittest.main(verbosity=2) \ No newline at end of file + unittest.main(verbosity=2) From 1606755da0e4fb362c88fdf02373f0dc3e4eef8f Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Thu, 15 Oct 2020 10:04:06 +0200 Subject: [PATCH 041/179] [SystemZ] Mark unsaved argument R6 as live throughout function. For historical reasons, the R6 register is a callee-saved argument register. This means that if it is used to pass an argument to a function that does not clobber it, it is live throughout the function. This patch makes sure that in this special case any kill flags of it are removed. Review: Ulrich Weigand, Eli Friedman Differential Revision: https://reviews.llvm.org/D89451 --- .../Target/SystemZ/SystemZFrameLowering.cpp | 10 + llvm/test/CodeGen/SystemZ/frame-26.mir | 199 ++++++++++++++++++ 2 files changed, 209 insertions(+) create mode 100644 llvm/test/CodeGen/SystemZ/frame-26.mir diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp index 985722fdcab4a..3e114e17f85c1 100644 --- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -316,6 +316,8 @@ void SystemZFrameLowering:: processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const { MachineFrameInfo &MFFrame = MF.getFrameInfo(); + SystemZMachineFunctionInfo *ZFI = MF.getInfo(); + MachineRegisterInfo *MRI = &MF.getRegInfo(); bool BackChain = MF.getFunction().hasFnAttribute("backchain"); if (!usePackedStack(MF) || BackChain) @@ -344,6 +346,14 @@ processFunctionBeforeFrameFinalized(MachineFunction &MF, RS->addScavengingFrameIndex(MFFrame.CreateStackObject(8, Align(8), false)); RS->addScavengingFrameIndex(MFFrame.CreateStackObject(8, Align(8), false)); } + + // If R6 is used as an argument register it is still callee saved. If it in + // this case is not clobbered (and restored) it should never be marked as + // killed. + if (MF.front().isLiveIn(SystemZ::R6D) && + ZFI->getRestoreGPRRegs().LowGPR != SystemZ::R6D) + for (auto &MO : MRI->use_nodbg_operands(SystemZ::R6D)) + MO.setIsKill(false); } // Emit instructions before MBBI (in MBB) to add NumBytes to Reg. diff --git a/llvm/test/CodeGen/SystemZ/frame-26.mir b/llvm/test/CodeGen/SystemZ/frame-26.mir new file mode 100644 index 0000000000000..e4ff1bd3dcb60 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/frame-26.mir @@ -0,0 +1,199 @@ +# RUN: llc -mtriple=s390x-linux-gnu -start-before=prologepilog %s -o - -print-after=prologepilog \ +# RUN: -verify-machineinstrs 2>&1 | FileCheck %s +# REQUIRES: asserts +# +# Test that R6 when used for an argument is modelled as being live throughout +# the function when not saved in the prologue.. + +# CHECK: # *** IR Dump After Prologue/Epilogue Insertion & Frame Finalization ***: +# CHECK-NEXT: # Machine code for function fun0: +# CHECK-LABEL: bb.0: +# CHECK: liveins:{{.*}} $r6d +# CHECK: STMG killed $r7d, killed $r15d +# CHECK: STG renamable $r6d +# CHECK: End machine code for function fun0. + + +--- | + + @g_181 = external dso_local global i32, align 4 + @g_1390 = external dso_local constant i64*, align 8 + + define internal i8 @fun0(i8 %arg, i8 %arg1, i32 %arg2, i8 %arg3, i32* %arg4, float %F0, float %F1) #0 { + ret i8 0 + } + + ; Same function but in a single block which will make the verifier complain + ; if R6 is killed by the original store before the point where the + ; RegScavenger inserts its (killing) store of R6. + define internal i8 @fun1(i8 %arg, i8 %arg1, i32 %arg2, i8 %arg3, i32* %arg4) #0 { + ret i8 0 + } + + attributes #0 = { "frame-pointer"="all" } + +... +--- +name: fun0 +alignment: 16 +tracksRegLiveness: true +liveins: + - { reg: '$r6d' } + - { reg: '$f0s' } + - { reg: '$f2s' } +frameInfo: + maxAlignment: 8 +stack: + - { id: 0, size: 96, alignment: 8 } + - { id: 1, size: 1960, alignment: 8 } + - { id: 2, size: 8, alignment: 8 } + - { id: 3, size: 320, alignment: 8 } + - { id: 4, size: 4, alignment: 4 } + - { id: 5, size: 8, alignment: 8 } + - { id: 6, size: 8, alignment: 8 } + - { id: 7, size: 4, alignment: 4 } + - { id: 8, size: 8, alignment: 8 } + - { id: 9, size: 4, alignment: 4 } + - { id: 10, size: 8, alignment: 8 } + - { id: 11, size: 8, alignment: 8 } + - { id: 12, size: 8, alignment: 8 } + - { id: 13, size: 8, alignment: 8 } + - { id: 14, size: 24, alignment: 4 } + - { id: 15, size: 4, alignment: 4 } + - { id: 16, size: 1792, alignment: 8 } + - { id: 17, size: 8, alignment: 8 } + - { id: 18, size: 8, alignment: 8 } + - { id: 19, size: 8, alignment: 8 } + - { id: 20, size: 1, alignment: 2 } + - { id: 21, size: 672, alignment: 8 } + - { id: 22, size: 4, alignment: 4 } + - { id: 23, size: 4, alignment: 4 } + - { id: 24, size: 4, alignment: 4 } + - { id: 25, size: 64, alignment: 8 } +machineFunctionInfo: {} +body: | + bb.0: + liveins: $f0s, $f2s, $r6d + + STG killed renamable $r6d, undef renamable $r1d, 0, $noreg :: (store 8 into `i32** undef`) + renamable $r0d = LARL @g_181 + nofpexcept CEBR renamable $f0s, renamable $f2s, implicit-def $cc, implicit $fpc + STG renamable $r0d, undef renamable $r1d, 0, $noreg :: (store 8 into `i32** undef`) + BRC 15, 4, %bb.2, implicit killed $cc + + bb.1: + liveins: $f2s, $r0d + + renamable $f0s = COPY killed renamable $f2s + + bb.2: + liveins: $f0s, $r0d + + STE killed renamable $f0s, undef renamable $r1d, 0, $noreg :: (volatile store 4 into `float* undef`) + renamable $r1d = nuw LA %stack.0, 16, $noreg + renamable $r2d = nuw LA %stack.0, 24, $noreg + renamable $r3d = LA %stack.0, 40, $noreg + renamable $r4d = LARL @g_1390 + STG renamable $r4d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) + renamable $r5d = nuw LA %stack.0, 48, $noreg + renamable $r14d = LA %stack.0, 72, $noreg + renamable $r13d = LA %stack.0, 80, $noreg + renamable $r12d = LA %stack.0, 56, $noreg + renamable $r10d = LA %stack.0, 0, $noreg + STG renamable $r10d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) + renamable $r9d = LA %stack.0, 64, $noreg + renamable $r8d = LA %stack.0, 88, $noreg + renamable $r7d = nuw LA %stack.0, 8, $noreg + MVGHI %stack.1, 904, 0 + STG killed renamable $r9d, $noreg, 0, $noreg :: (store 8 into `i64*** null`) + STG killed renamable $r3d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) + STG killed renamable $r14d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) + STG killed renamable $r7d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) + STG killed renamable $r1d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) + STG killed renamable $r4d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) + STG killed renamable $r2d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) + STG killed renamable $r5d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) + STG killed renamable $r8d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) + STG killed renamable $r12d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) + STG killed renamable $r13d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) + STG killed renamable $r10d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) + $r2l = LHI 0 + STG killed renamable $r0d, undef renamable $r1d, 0, $noreg :: (store 8 into `i32** undef`) + Return implicit $r2l + +... +--- +name: fun1 +alignment: 16 +tracksRegLiveness: true +liveins: + - { reg: '$r6d' } +frameInfo: + maxAlignment: 8 +stack: + - { id: 0, size: 96, alignment: 8 } + - { id: 1, size: 1960, alignment: 8 } + - { id: 2, size: 8, alignment: 8 } + - { id: 3, size: 320, alignment: 8 } + - { id: 4, size: 4, alignment: 4 } + - { id: 5, size: 8, alignment: 8 } + - { id: 6, size: 8, alignment: 8 } + - { id: 7, size: 4, alignment: 4 } + - { id: 8, size: 8, alignment: 8 } + - { id: 9, size: 4, alignment: 4 } + - { id: 10, size: 8, alignment: 8 } + - { id: 11, size: 8, alignment: 8 } + - { id: 12, size: 8, alignment: 8 } + - { id: 13, size: 8, alignment: 8 } + - { id: 14, size: 24, alignment: 4 } + - { id: 15, size: 4, alignment: 4 } + - { id: 16, size: 1792, alignment: 8 } + - { id: 17, size: 8, alignment: 8 } + - { id: 18, size: 8, alignment: 8 } + - { id: 19, size: 8, alignment: 8 } + - { id: 20, size: 1, alignment: 2 } + - { id: 21, size: 672, alignment: 8 } + - { id: 22, size: 4, alignment: 4 } + - { id: 23, size: 4, alignment: 4 } + - { id: 24, size: 4, alignment: 4 } + - { id: 25, size: 64, alignment: 8 } +machineFunctionInfo: {} +body: | + bb.0: + liveins: $r6d + + STG killed renamable $r6d, undef renamable $r1d, 0, $noreg :: (store 8 into `i32** undef`) + renamable $r0d = LARL @g_181 + STG renamable $r0d, undef renamable $r1d, 0, $noreg :: (store 8 into `i32** undef`) + renamable $r1d = nuw LA %stack.0, 16, $noreg + renamable $r2d = nuw LA %stack.0, 24, $noreg + renamable $r3d = LA %stack.0, 40, $noreg + renamable $r4d = LARL @g_1390 + STG renamable $r4d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) + renamable $r5d = nuw LA %stack.0, 48, $noreg + renamable $r14d = LA %stack.0, 72, $noreg + renamable $r13d = LA %stack.0, 80, $noreg + renamable $r12d = LA %stack.0, 56, $noreg + renamable $r10d = LA %stack.0, 0, $noreg + STG renamable $r10d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) + renamable $r9d = LA %stack.0, 64, $noreg + renamable $r8d = LA %stack.0, 88, $noreg + renamable $r7d = nuw LA %stack.0, 8, $noreg + MVGHI %stack.1, 904, 0 + STG killed renamable $r9d, $noreg, 0, $noreg :: (store 8 into `i64*** null`) + STG killed renamable $r3d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) + STG killed renamable $r14d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) + STG killed renamable $r7d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) + STG killed renamable $r1d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) + STG killed renamable $r4d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) + STG killed renamable $r2d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) + STG killed renamable $r5d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) + STG killed renamable $r8d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) + STG killed renamable $r12d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) + STG killed renamable $r13d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) + STG killed renamable $r10d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) + $r2l = LHI 0 + STG killed renamable $r0d, undef renamable $r1d, 0, $noreg :: (store 8 into `i32** undef`) + Return implicit $r2l + +... From 1e46d1aa3f9b618e8db32af5c855fbc386a950aa Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Fri, 26 Jun 2020 12:08:59 -0400 Subject: [PATCH 042/179] [libc++] Include <__config_site> from <__config> Prior to this patch, we would generate a fancy <__config> header by concatenating <__config_site> and <__config>. This complexifies the build system and also increases the difference between what's tested and what's actually installed. This patch removes that complexity and instead simply installs <__config_site> alongside the libc++ headers. <__config_site> is then included by <__config>, which is much simpler. Doing this also opens the door to having different <__config_site> headers depending on the target, which was impossible before. It does change the workflow for testing header-only changes to libc++. Previously, we would run `lit` against the headers in libcxx/include. After this patch, we run it against a fake installation root of the headers (containing a proper <__config_site> header). This makes use closer to testing what we actually install, which is good, however it does mean that we have to update that root before testing header changes. Thus, we now need to run `ninja check-cxx-deps` before running `lit` by hand. Differential Revision: https://reviews.llvm.org/D89041 --- libcxx/CMakeLists.txt | 1 + libcxx/docs/TestingLibcxx.rst | 10 ++++-- libcxx/include/CMakeLists.txt | 56 ++++++------------------------ libcxx/include/__config | 2 ++ libcxx/test/configs/legacy.cfg.in | 1 + libcxx/utils/libcxx/test/config.py | 19 ++-------- libcxxabi/src/CMakeLists.txt | 6 ++-- 7 files changed, 27 insertions(+), 68 deletions(-) diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt index 015a359bfb487..bdfecf6c0c607 100644 --- a/libcxx/CMakeLists.txt +++ b/libcxx/CMakeLists.txt @@ -31,6 +31,7 @@ set(CMAKE_MODULE_PATH set(LIBCXX_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) set(LIBCXX_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) set(LIBCXX_BINARY_INCLUDE_DIR "${LIBCXX_BINARY_DIR}/include/c++build") +set(LIBCXX_GENERATED_INCLUDE_DIR "${LIBCXX_BINARY_DIR}/include/c++/v1") if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR LIBCXX_STANDALONE_BUILD) project(libcxx CXX C) diff --git a/libcxx/docs/TestingLibcxx.rst b/libcxx/docs/TestingLibcxx.rst index d42becfd5c3d8..ec017e23b147a 100644 --- a/libcxx/docs/TestingLibcxx.rst +++ b/libcxx/docs/TestingLibcxx.rst @@ -26,8 +26,8 @@ Usage After building libc++, you can run parts of the libc++ test suite by simply running ``llvm-lit`` on a specified test or directory. If you're unsure -whether the required libraries have been built, you can use the -`check-cxx-deps` target. For example: +whether the required targets have been built, you can use the `check-cxx-deps` +target to build them. For example: .. code-block:: bash @@ -37,6 +37,12 @@ whether the required libraries have been built, you can use the $ /bin/llvm-lit -sv libcxx/test/std/depr/depr.c.headers/stdlib_h.pass.cpp # Run a single test $ /bin/llvm-lit -sv libcxx/test/std/atomics libcxx/test/std/threads # Test std::thread and std::atomic +In the default configuration, the tests are built against headers that form a +fake installation root of libc++. This installation root has to be updated when +changes are made to the headers, so you should re-run the `check-cxx-deps` target +before running the tests manually with `lit` when you make any sort of change, +including to the headers. + Sometimes you'll want to change the way LIT is running the tests. Custom options can be specified using the `--param==` flag. The most common option you'll want to change is the standard dialect (ie -std=c++XX). By default the diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index 7c97db41bb73a..a8d6f74ea38f0 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -2,6 +2,7 @@ set(files __bit_reference __bsd_locale_defaults.h __bsd_locale_fallbacks.h + __config __errc __debug __functional_03 @@ -184,62 +185,28 @@ if(LIBCXX_INSTALL_SUPPORT_HEADERS) ) endif() -configure_file("__config_site.in" - "${LIBCXX_BINARY_DIR}/__config_site" - @ONLY) - -# Generate a custom __config header. The new header is created -# by prepending __config_site to the current __config header. -add_custom_command(OUTPUT ${LIBCXX_BINARY_DIR}/__generated_config - COMMAND ${Python3_EXECUTABLE} ${LIBCXX_SOURCE_DIR}/utils/cat_files.py - ${LIBCXX_BINARY_DIR}/__config_site - ${LIBCXX_SOURCE_DIR}/include/__config - -o ${LIBCXX_BINARY_DIR}/__generated_config - DEPENDS ${LIBCXX_SOURCE_DIR}/include/__config - ${LIBCXX_BINARY_DIR}/__config_site -) -# Add a target that executes the generation commands. -add_custom_target(cxx-generated-config ALL - DEPENDS ${LIBCXX_BINARY_DIR}/__generated_config) - if(LIBCXX_HEADER_DIR) - set(output_dir ${LIBCXX_HEADER_DIR}/include/c++/v1) + configure_file("__config_site.in" "${LIBCXX_GENERATED_INCLUDE_DIR}/__config_site" @ONLY) - set(out_files) + set(_all_includes "${LIBCXX_GENERATED_INCLUDE_DIR}/__config_site") foreach(f ${files}) - set(src ${CMAKE_CURRENT_SOURCE_DIR}/${f}) - set(dst ${output_dir}/${f}) + set(src "${CMAKE_CURRENT_SOURCE_DIR}/${f}") + set(dst "${LIBCXX_GENERATED_INCLUDE_DIR}/${f}") add_custom_command(OUTPUT ${dst} DEPENDS ${src} COMMAND ${CMAKE_COMMAND} -E copy_if_different ${src} ${dst} COMMENT "Copying CXX header ${f}") - list(APPEND out_files ${dst}) + list(APPEND _all_includes "${dst}") endforeach() - - # Copy the generated header as __config into build directory. - set(src ${LIBCXX_BINARY_DIR}/__generated_config) - set(dst ${output_dir}/__config) - add_custom_command(OUTPUT ${dst} - DEPENDS ${src} cxx-generated-config - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${src} ${dst} - COMMENT "Copying CXX __config") - list(APPEND out_files ${dst}) - add_custom_target(generate-cxx-headers DEPENDS ${out_files}) + add_custom_target(generate-cxx-headers DEPENDS ${_all_includes}) add_library(cxx-headers INTERFACE) add_dependencies(cxx-headers generate-cxx-headers ${LIBCXX_CXX_ABI_HEADER_TARGET}) # TODO: Use target_include_directories once we figure out why that breaks the runtimes build if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC" OR "${CMAKE_CXX_SIMULATE_ID}" STREQUAL "MSVC") - target_compile_options(cxx-headers INTERFACE /I "${output_dir}") - else() - target_compile_options(cxx-headers INTERFACE -I "${output_dir}") - endif() - - # Make sure the generated __config_site header is included when we build the library. - if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC" OR "${CMAKE_CXX_SIMULATE_ID}" STREQUAL "MSVC") - target_compile_options(cxx-headers INTERFACE /FI "${LIBCXX_BINARY_DIR}/__config_site") + target_compile_options(cxx-headers INTERFACE /I "${LIBCXX_GENERATED_INCLUDE_DIR}") else() - target_compile_options(cxx-headers INTERFACE -include "${LIBCXX_BINARY_DIR}/__config_site") + target_compile_options(cxx-headers INTERFACE -I "${LIBCXX_GENERATED_INCLUDE_DIR}") endif() else() add_library(cxx-headers INTERFACE) @@ -255,11 +222,10 @@ if (LIBCXX_INSTALL_HEADERS) ) endforeach() - # Install the generated header as __config. - install(FILES ${LIBCXX_BINARY_DIR}/__generated_config + # Install the generated __config_site. + install(FILES ${LIBCXX_BINARY_DIR}/__config_site DESTINATION ${LIBCXX_INSTALL_HEADER_PREFIX}include/c++/v1 PERMISSIONS OWNER_READ OWNER_WRITE GROUP_READ WORLD_READ - RENAME __config COMPONENT cxx-headers) if (NOT CMAKE_CONFIGURATION_TYPES) diff --git a/libcxx/include/__config b/libcxx/include/__config index 8d2ab2e51b115..d99e972004b86 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -10,6 +10,8 @@ #ifndef _LIBCPP_CONFIG #define _LIBCPP_CONFIG +#include <__config_site> + #if defined(_MSC_VER) && !defined(__clang__) # if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # define _LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER diff --git a/libcxx/test/configs/legacy.cfg.in b/libcxx/test/configs/legacy.cfg.in index f0a4e8a73e09a..148ac3fc5a899 100644 --- a/libcxx/test/configs/legacy.cfg.in +++ b/libcxx/test/configs/legacy.cfg.in @@ -3,6 +3,7 @@ import os import site +config.cxx_headers = "@LIBCXX_GENERATED_INCLUDE_DIR@" config.cxx_under_test = "@CMAKE_CXX_COMPILER@" config.project_obj_root = "@CMAKE_BINARY_DIR@" config.libcxx_src_root = "@LIBCXX_SOURCE_DIR@" diff --git a/libcxx/utils/libcxx/test/config.py b/libcxx/utils/libcxx/test/config.py index 0d21aa17afd27..c5050d1b6b9a7 100644 --- a/libcxx/utils/libcxx/test/config.py +++ b/libcxx/utils/libcxx/test/config.py @@ -331,7 +331,6 @@ def configure_default_compile_flags(self): def configure_compile_flags_header_includes(self): support_path = os.path.join(self.libcxx_src_root, 'test', 'support') - self.configure_config_site_header() if self.cxx_stdlib_under_test != 'libstdc++' and \ not self.target_info.is_windows(): self.cxx.compile_flags += [ @@ -348,16 +347,12 @@ def configure_compile_flags_header_includes(self): 'set_windows_crt_report_mode.h') ] cxx_headers = self.get_lit_conf('cxx_headers') - if cxx_headers == '' or (cxx_headers is None - and self.cxx_stdlib_under_test != 'libc++'): + if cxx_headers is None and self.cxx_stdlib_under_test != 'libc++': self.lit_config.note('using the system cxx headers') return self.cxx.compile_flags += ['-nostdinc++'] - if cxx_headers is None: - cxx_headers = os.path.join(self.libcxx_src_root, 'include') if not os.path.isdir(cxx_headers): - self.lit_config.fatal("cxx_headers='%s' is not a directory." - % cxx_headers) + self.lit_config.fatal("cxx_headers='{}' is not a directory.".format(cxx_headers)) self.cxx.compile_flags += ['-I' + cxx_headers] if self.libcxx_obj_root is not None: cxxabi_headers = os.path.join(self.libcxx_obj_root, 'include', @@ -365,16 +360,6 @@ def configure_compile_flags_header_includes(self): if os.path.isdir(cxxabi_headers): self.cxx.compile_flags += ['-I' + cxxabi_headers] - def configure_config_site_header(self): - # Check for a possible __config_site in the build directory. We - # use this if it exists. - if self.libcxx_obj_root is None: - return - config_site_header = os.path.join(self.libcxx_obj_root, '__config_site') - if not os.path.isfile(config_site_header): - return - self.cxx.compile_flags += ['-include', config_site_header] - def configure_link_flags(self): # Configure library path self.configure_link_flags_cxx_library_path() diff --git a/libcxxabi/src/CMakeLists.txt b/libcxxabi/src/CMakeLists.txt index e9e454082a054..8d4d84c225e23 100644 --- a/libcxxabi/src/CMakeLists.txt +++ b/libcxxabi/src/CMakeLists.txt @@ -55,8 +55,6 @@ if (MSVC_IDE OR XCODE) endif() endif() -include_directories("${LIBCXXABI_LIBCXX_INCLUDES}") - if (LIBCXXABI_HAS_CXA_THREAD_ATEXIT_IMPL) add_definitions(-DHAVE___CXA_THREAD_ATEXIT_IMPL) endif() @@ -168,7 +166,7 @@ if (LIBCXXABI_ENABLE_SHARED) if(COMMAND llvm_setup_rpath) llvm_setup_rpath(cxxabi_shared) endif() - target_link_libraries(cxxabi_shared PRIVATE ${LIBCXXABI_SHARED_LIBRARIES} ${LIBCXXABI_LIBRARIES}) + target_link_libraries(cxxabi_shared PRIVATE cxx-headers ${LIBCXXABI_SHARED_LIBRARIES} ${LIBCXXABI_LIBRARIES}) if (TARGET pstl::ParallelSTL) target_link_libraries(cxxabi_shared PUBLIC pstl::ParallelSTL) endif() @@ -233,7 +231,7 @@ endif() # Build the static library. if (LIBCXXABI_ENABLE_STATIC) add_library(cxxabi_static STATIC ${LIBCXXABI_SOURCES} ${LIBCXXABI_HEADERS}) - target_link_libraries(cxxabi_static PRIVATE ${LIBCXXABI_STATIC_LIBRARIES} ${LIBCXXABI_LIBRARIES}) + target_link_libraries(cxxabi_static PRIVATE cxx-headers ${LIBCXXABI_STATIC_LIBRARIES} ${LIBCXXABI_LIBRARIES}) if (TARGET pstl::ParallelSTL) target_link_libraries(cxxabi_static PUBLIC pstl::ParallelSTL) endif() From 01ea93d85d6e2240d70daf52e84772aba310bc19 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 21 Oct 2020 08:25:09 -0400 Subject: [PATCH 043/179] [CostModel] remove cost-kind predicate for memcpy cost The default implementation base returns TCC_Expensive (currently set to '4'), so that explains the test diff. This probably does not make sense for most callers, but at least now the costs will be consistently wrong instead of mysteriously wrong. The ARM target has an override that tries to model codegen expansion, and that should likely be adapted for general usage. This probably does not affect anything because the vectorizers are the primary users of the throughput cost, but memcpy is not listed as a trivially vectorizable intrinsic. --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 5 +---- llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index abcd612c7b553..2eec38bbcc5d9 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1166,10 +1166,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { break; case Intrinsic::memcpy: - // FIXME: all cost kinds should default to the same thing? - if (CostKind != TTI::TCK_RecipThroughput) - return thisT()->getMemcpyCost(ICA.getInst()); - return BaseT::getIntrinsicInstrCost(ICA, CostKind); + return thisT()->getMemcpyCost(ICA.getInst()); case Intrinsic::masked_scatter: { // FIXME: all cost kinds should default to the same thing? diff --git a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll index c76585a51ef36..d3bf703513ebd 100644 --- a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll +++ b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll @@ -226,7 +226,7 @@ define void @reduce_fmax(<16 x float> %va) { define void @memcpy(i8* %a, i8* %b, i32 %c) { ; THRU-LABEL: 'memcpy' -; THRU-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 %b, i32 32, i1 false) +; THRU-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 %b, i32 32, i1 false) ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'memcpy' From eb60c48744f4edbdc72a44250c84796f3bf5f262 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Wed, 21 Oct 2020 09:17:30 -0400 Subject: [PATCH 044/179] [libc++] Revert "Include <__config_site> from <__config>" This temporarily reverts commit 1e46d1aa until I find a solution to fix the libc++abi and libunwind test suites with that change. --- libcxx/CMakeLists.txt | 1 - libcxx/docs/TestingLibcxx.rst | 10 ++---- libcxx/include/CMakeLists.txt | 56 ++++++++++++++++++++++++------ libcxx/include/__config | 2 -- libcxx/test/configs/legacy.cfg.in | 1 - libcxx/utils/libcxx/test/config.py | 19 ++++++++-- libcxxabi/src/CMakeLists.txt | 6 ++-- 7 files changed, 68 insertions(+), 27 deletions(-) diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt index bdfecf6c0c607..015a359bfb487 100644 --- a/libcxx/CMakeLists.txt +++ b/libcxx/CMakeLists.txt @@ -31,7 +31,6 @@ set(CMAKE_MODULE_PATH set(LIBCXX_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) set(LIBCXX_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) set(LIBCXX_BINARY_INCLUDE_DIR "${LIBCXX_BINARY_DIR}/include/c++build") -set(LIBCXX_GENERATED_INCLUDE_DIR "${LIBCXX_BINARY_DIR}/include/c++/v1") if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR LIBCXX_STANDALONE_BUILD) project(libcxx CXX C) diff --git a/libcxx/docs/TestingLibcxx.rst b/libcxx/docs/TestingLibcxx.rst index ec017e23b147a..d42becfd5c3d8 100644 --- a/libcxx/docs/TestingLibcxx.rst +++ b/libcxx/docs/TestingLibcxx.rst @@ -26,8 +26,8 @@ Usage After building libc++, you can run parts of the libc++ test suite by simply running ``llvm-lit`` on a specified test or directory. If you're unsure -whether the required targets have been built, you can use the `check-cxx-deps` -target to build them. For example: +whether the required libraries have been built, you can use the +`check-cxx-deps` target. For example: .. code-block:: bash @@ -37,12 +37,6 @@ target to build them. For example: $ /bin/llvm-lit -sv libcxx/test/std/depr/depr.c.headers/stdlib_h.pass.cpp # Run a single test $ /bin/llvm-lit -sv libcxx/test/std/atomics libcxx/test/std/threads # Test std::thread and std::atomic -In the default configuration, the tests are built against headers that form a -fake installation root of libc++. This installation root has to be updated when -changes are made to the headers, so you should re-run the `check-cxx-deps` target -before running the tests manually with `lit` when you make any sort of change, -including to the headers. - Sometimes you'll want to change the way LIT is running the tests. Custom options can be specified using the `--param==` flag. The most common option you'll want to change is the standard dialect (ie -std=c++XX). By default the diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index a8d6f74ea38f0..7c97db41bb73a 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -2,7 +2,6 @@ set(files __bit_reference __bsd_locale_defaults.h __bsd_locale_fallbacks.h - __config __errc __debug __functional_03 @@ -185,28 +184,62 @@ if(LIBCXX_INSTALL_SUPPORT_HEADERS) ) endif() +configure_file("__config_site.in" + "${LIBCXX_BINARY_DIR}/__config_site" + @ONLY) + +# Generate a custom __config header. The new header is created +# by prepending __config_site to the current __config header. +add_custom_command(OUTPUT ${LIBCXX_BINARY_DIR}/__generated_config + COMMAND ${Python3_EXECUTABLE} ${LIBCXX_SOURCE_DIR}/utils/cat_files.py + ${LIBCXX_BINARY_DIR}/__config_site + ${LIBCXX_SOURCE_DIR}/include/__config + -o ${LIBCXX_BINARY_DIR}/__generated_config + DEPENDS ${LIBCXX_SOURCE_DIR}/include/__config + ${LIBCXX_BINARY_DIR}/__config_site +) +# Add a target that executes the generation commands. +add_custom_target(cxx-generated-config ALL + DEPENDS ${LIBCXX_BINARY_DIR}/__generated_config) + if(LIBCXX_HEADER_DIR) - configure_file("__config_site.in" "${LIBCXX_GENERATED_INCLUDE_DIR}/__config_site" @ONLY) + set(output_dir ${LIBCXX_HEADER_DIR}/include/c++/v1) - set(_all_includes "${LIBCXX_GENERATED_INCLUDE_DIR}/__config_site") + set(out_files) foreach(f ${files}) - set(src "${CMAKE_CURRENT_SOURCE_DIR}/${f}") - set(dst "${LIBCXX_GENERATED_INCLUDE_DIR}/${f}") + set(src ${CMAKE_CURRENT_SOURCE_DIR}/${f}) + set(dst ${output_dir}/${f}) add_custom_command(OUTPUT ${dst} DEPENDS ${src} COMMAND ${CMAKE_COMMAND} -E copy_if_different ${src} ${dst} COMMENT "Copying CXX header ${f}") - list(APPEND _all_includes "${dst}") + list(APPEND out_files ${dst}) endforeach() - add_custom_target(generate-cxx-headers DEPENDS ${_all_includes}) + + # Copy the generated header as __config into build directory. + set(src ${LIBCXX_BINARY_DIR}/__generated_config) + set(dst ${output_dir}/__config) + add_custom_command(OUTPUT ${dst} + DEPENDS ${src} cxx-generated-config + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${src} ${dst} + COMMENT "Copying CXX __config") + list(APPEND out_files ${dst}) + add_custom_target(generate-cxx-headers DEPENDS ${out_files}) add_library(cxx-headers INTERFACE) add_dependencies(cxx-headers generate-cxx-headers ${LIBCXX_CXX_ABI_HEADER_TARGET}) # TODO: Use target_include_directories once we figure out why that breaks the runtimes build if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC" OR "${CMAKE_CXX_SIMULATE_ID}" STREQUAL "MSVC") - target_compile_options(cxx-headers INTERFACE /I "${LIBCXX_GENERATED_INCLUDE_DIR}") + target_compile_options(cxx-headers INTERFACE /I "${output_dir}") + else() + target_compile_options(cxx-headers INTERFACE -I "${output_dir}") + endif() + + # Make sure the generated __config_site header is included when we build the library. + if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC" OR "${CMAKE_CXX_SIMULATE_ID}" STREQUAL "MSVC") + target_compile_options(cxx-headers INTERFACE /FI "${LIBCXX_BINARY_DIR}/__config_site") else() - target_compile_options(cxx-headers INTERFACE -I "${LIBCXX_GENERATED_INCLUDE_DIR}") + target_compile_options(cxx-headers INTERFACE -include "${LIBCXX_BINARY_DIR}/__config_site") endif() else() add_library(cxx-headers INTERFACE) @@ -222,10 +255,11 @@ if (LIBCXX_INSTALL_HEADERS) ) endforeach() - # Install the generated __config_site. - install(FILES ${LIBCXX_BINARY_DIR}/__config_site + # Install the generated header as __config. + install(FILES ${LIBCXX_BINARY_DIR}/__generated_config DESTINATION ${LIBCXX_INSTALL_HEADER_PREFIX}include/c++/v1 PERMISSIONS OWNER_READ OWNER_WRITE GROUP_READ WORLD_READ + RENAME __config COMPONENT cxx-headers) if (NOT CMAKE_CONFIGURATION_TYPES) diff --git a/libcxx/include/__config b/libcxx/include/__config index d99e972004b86..8d2ab2e51b115 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -10,8 +10,6 @@ #ifndef _LIBCPP_CONFIG #define _LIBCPP_CONFIG -#include <__config_site> - #if defined(_MSC_VER) && !defined(__clang__) # if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # define _LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER diff --git a/libcxx/test/configs/legacy.cfg.in b/libcxx/test/configs/legacy.cfg.in index 148ac3fc5a899..f0a4e8a73e09a 100644 --- a/libcxx/test/configs/legacy.cfg.in +++ b/libcxx/test/configs/legacy.cfg.in @@ -3,7 +3,6 @@ import os import site -config.cxx_headers = "@LIBCXX_GENERATED_INCLUDE_DIR@" config.cxx_under_test = "@CMAKE_CXX_COMPILER@" config.project_obj_root = "@CMAKE_BINARY_DIR@" config.libcxx_src_root = "@LIBCXX_SOURCE_DIR@" diff --git a/libcxx/utils/libcxx/test/config.py b/libcxx/utils/libcxx/test/config.py index c5050d1b6b9a7..0d21aa17afd27 100644 --- a/libcxx/utils/libcxx/test/config.py +++ b/libcxx/utils/libcxx/test/config.py @@ -331,6 +331,7 @@ def configure_default_compile_flags(self): def configure_compile_flags_header_includes(self): support_path = os.path.join(self.libcxx_src_root, 'test', 'support') + self.configure_config_site_header() if self.cxx_stdlib_under_test != 'libstdc++' and \ not self.target_info.is_windows(): self.cxx.compile_flags += [ @@ -347,12 +348,16 @@ def configure_compile_flags_header_includes(self): 'set_windows_crt_report_mode.h') ] cxx_headers = self.get_lit_conf('cxx_headers') - if cxx_headers is None and self.cxx_stdlib_under_test != 'libc++': + if cxx_headers == '' or (cxx_headers is None + and self.cxx_stdlib_under_test != 'libc++'): self.lit_config.note('using the system cxx headers') return self.cxx.compile_flags += ['-nostdinc++'] + if cxx_headers is None: + cxx_headers = os.path.join(self.libcxx_src_root, 'include') if not os.path.isdir(cxx_headers): - self.lit_config.fatal("cxx_headers='{}' is not a directory.".format(cxx_headers)) + self.lit_config.fatal("cxx_headers='%s' is not a directory." + % cxx_headers) self.cxx.compile_flags += ['-I' + cxx_headers] if self.libcxx_obj_root is not None: cxxabi_headers = os.path.join(self.libcxx_obj_root, 'include', @@ -360,6 +365,16 @@ def configure_compile_flags_header_includes(self): if os.path.isdir(cxxabi_headers): self.cxx.compile_flags += ['-I' + cxxabi_headers] + def configure_config_site_header(self): + # Check for a possible __config_site in the build directory. We + # use this if it exists. + if self.libcxx_obj_root is None: + return + config_site_header = os.path.join(self.libcxx_obj_root, '__config_site') + if not os.path.isfile(config_site_header): + return + self.cxx.compile_flags += ['-include', config_site_header] + def configure_link_flags(self): # Configure library path self.configure_link_flags_cxx_library_path() diff --git a/libcxxabi/src/CMakeLists.txt b/libcxxabi/src/CMakeLists.txt index 8d4d84c225e23..e9e454082a054 100644 --- a/libcxxabi/src/CMakeLists.txt +++ b/libcxxabi/src/CMakeLists.txt @@ -55,6 +55,8 @@ if (MSVC_IDE OR XCODE) endif() endif() +include_directories("${LIBCXXABI_LIBCXX_INCLUDES}") + if (LIBCXXABI_HAS_CXA_THREAD_ATEXIT_IMPL) add_definitions(-DHAVE___CXA_THREAD_ATEXIT_IMPL) endif() @@ -166,7 +168,7 @@ if (LIBCXXABI_ENABLE_SHARED) if(COMMAND llvm_setup_rpath) llvm_setup_rpath(cxxabi_shared) endif() - target_link_libraries(cxxabi_shared PRIVATE cxx-headers ${LIBCXXABI_SHARED_LIBRARIES} ${LIBCXXABI_LIBRARIES}) + target_link_libraries(cxxabi_shared PRIVATE ${LIBCXXABI_SHARED_LIBRARIES} ${LIBCXXABI_LIBRARIES}) if (TARGET pstl::ParallelSTL) target_link_libraries(cxxabi_shared PUBLIC pstl::ParallelSTL) endif() @@ -231,7 +233,7 @@ endif() # Build the static library. if (LIBCXXABI_ENABLE_STATIC) add_library(cxxabi_static STATIC ${LIBCXXABI_SOURCES} ${LIBCXXABI_HEADERS}) - target_link_libraries(cxxabi_static PRIVATE cxx-headers ${LIBCXXABI_STATIC_LIBRARIES} ${LIBCXXABI_LIBRARIES}) + target_link_libraries(cxxabi_static PRIVATE ${LIBCXXABI_STATIC_LIBRARIES} ${LIBCXXABI_LIBRARIES}) if (TARGET pstl::ParallelSTL) target_link_libraries(cxxabi_static PUBLIC pstl::ParallelSTL) endif() From 1bcec29afb321976cdcaa632ee6a47567dd651a7 Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Wed, 21 Oct 2020 09:28:50 -0400 Subject: [PATCH 045/179] Only run when `arm` is registered. NFC. --- clang/test/Driver/arm-float-abi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/test/Driver/arm-float-abi.c b/clang/test/Driver/arm-float-abi.c index 74ba3fd3bc579..294f024447695 100644 --- a/clang/test/Driver/arm-float-abi.c +++ b/clang/test/Driver/arm-float-abi.c @@ -1,3 +1,4 @@ +// REQUIRES: arm-registered-target // RUN: not %clang %s -target armv7-apple-ios -mfloat-abi=hard 2>&1 | FileCheck -check-prefix=ARMV7-ERROR %s // RUN: %clang %s -target armv7-apple-ios -mfloat-abi=softfp -### 2>&1 | FileCheck -check-prefix=NOERROR %s // RUN: %clang %s -arch armv7 -target thumbv7-apple-darwin-eabi -mfloat-abi=hard -### 2>&1 | FileCheck -check-prefix=NOERROR %s From 537f0fbe82049b8d5b6c700ecc4ab166c350b0c6 Mon Sep 17 00:00:00 2001 From: Jeremy Morse Date: Wed, 21 Oct 2020 14:28:28 +0100 Subject: [PATCH 046/179] [DebugInfo] Follow up c521e44defb5 with an API improvement As mentioned post-commit in D85749, the 'substituteDebugValuesForInst' method added in c521e44defb5 would be better off with a limit on the number of operands to substitute. This handles the common case of "substitute the first operand between these two differing instructions", or possibly up to N first operands. --- llvm/include/llvm/CodeGen/MachineFunction.h | 6 +++++- llvm/lib/CodeGen/MachineFunction.cpp | 9 +++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h index 01f59045ecb4a..e9979c788ce0b 100644 --- a/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/llvm/include/llvm/CodeGen/MachineFunction.h @@ -458,7 +458,11 @@ class MachineFunction { /// which has the same signature (i.e., def operands in the same place) but /// a modified instruction type, flags, or otherwise. An example: X86 moves /// are sometimes transformed into equivalent LEAs. - void substituteDebugValuesForInst(const MachineInstr &Old, MachineInstr &New); + /// If the two instructions are not the same opcode, limit which operands to + /// examine for substitutions to the first N operands by setting + /// \p MaxOperand. + void substituteDebugValuesForInst(const MachineInstr &Old, MachineInstr &New, + unsigned MaxOperand = UINT_MAX); MachineFunction(Function &F, const LLVMTargetMachine &Target, const TargetSubtargetInfo &STI, unsigned FunctionNum, diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp index 2568448f3c92c..a7edc274dd237 100644 --- a/llvm/lib/CodeGen/MachineFunction.cpp +++ b/llvm/lib/CodeGen/MachineFunction.cpp @@ -955,7 +955,8 @@ void MachineFunction::makeDebugValueSubstitution(DebugInstrOperandPair A, } void MachineFunction::substituteDebugValuesForInst(const MachineInstr &Old, - MachineInstr &New) { + MachineInstr &New, + unsigned MaxOperand) { // If the Old instruction wasn't tracked at all, there is no work to do. unsigned OldInstrNum = Old.peekDebugInstrNum(); if (!OldInstrNum) @@ -965,12 +966,16 @@ void MachineFunction::substituteDebugValuesForInst(const MachineInstr &Old, // Avoid creating new instr numbers unless we create a new substitution. // While this has no functional effect, it risks confusing someone reading // MIR output. + // Examine all the operands, or the first N specified by the caller. + MaxOperand = std::min(MaxOperand, Old.getNumOperands()); for (unsigned int I = 0; I < Old.getNumOperands(); ++I) { const auto &OldMO = Old.getOperand(I); + auto &NewMO = New.getOperand(I); + (void)NewMO; if (!OldMO.isReg() || !OldMO.isDef()) continue; - assert(Old.getOperand(I).isDef()); + assert(NewMO.isDef()); unsigned NewInstrNum = New.getDebugInstrNum(); makeDebugValueSubstitution(std::make_pair(OldInstrNum, I), From 7bf066a20f4bfd52a79ae7650632bb3925171104 Mon Sep 17 00:00:00 2001 From: Jan Kratochvil Date: Wed, 21 Oct 2020 15:49:53 +0200 Subject: [PATCH 047/179] [nfc] [lldb] Fix harmless slicing of DWARFDIE Differential Revision: https://reviews.llvm.org/D89875 --- lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h index abe16182ef620..d6009518da127 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h @@ -166,7 +166,7 @@ class DWARFUnit : public lldb_private::UserID { void SetBaseAddress(dw_addr_t base_addr); - DWARFBaseDIE GetUnitDIEOnly() { return DWARFDIE(this, GetUnitDIEPtrOnly()); } + DWARFBaseDIE GetUnitDIEOnly() { return {this, GetUnitDIEPtrOnly()}; } DWARFDIE DIE() { return DWARFDIE(this, DIEPtr()); } From dfd6b69e018c88c752e89b504c427718d99e6c8f Mon Sep 17 00:00:00 2001 From: "Paul C. Anagnostopoulos" Date: Tue, 20 Oct 2020 16:41:56 -0400 Subject: [PATCH 048/179] [ARM] [TableGen] Clean up !if(!eq(boolean, 1) and related booleans Differential Revision: https://reviews.llvm.org/D89822 --- llvm/lib/Target/ARM/ARMInstrMVE.td | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index f7f403503dc7f..67a3530a7d4e7 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -2474,7 +2474,7 @@ multiclass MVE_VABSNEG_int_m; } @@ -4777,7 +4777,7 @@ class MVE_VxMOVxN; - if !eq(top, 0) then { + if !not(top) then { // If we see MVEvmovn(a,ARMvrev(b),1), that wants to overwrite the odd // lanes of a with the odd lanes of b. In other words, the lanes we're // _keeping_ from a are the even ones. So we can flip it round and say that From 0784e17f1b4ac6b613ebf1fb1fb9b0dc9d0776ec Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Wed, 21 Oct 2020 16:09:07 +0200 Subject: [PATCH 049/179] Remove .svn from exclude list as we moved to git Reviewed By: emaste Differential Revision: https://reviews.llvm.org/D89859 --- clang/CMakeLists.txt | 1 - clang/tools/libclang/CMakeLists.txt | 1 - lld/CMakeLists.txt | 1 - lldb/cmake/modules/LLDBConfig.cmake | 2 -- llvm/CMakeLists.txt | 2 -- llvm/cmake/modules/CMakeLists.txt | 2 -- polly/CMakeLists.txt | 2 -- polly/lib/External/CMakeLists.txt | 1 - 8 files changed, 12 deletions(-) diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt index 900ef0a4d7372..a2b99e2e37e96 100644 --- a/clang/CMakeLists.txt +++ b/clang/CMakeLists.txt @@ -446,7 +446,6 @@ if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) PATTERN "*.def" PATTERN "*.h" PATTERN "config.h" EXCLUDE - PATTERN ".svn" EXCLUDE ) install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/clang diff --git a/clang/tools/libclang/CMakeLists.txt b/clang/tools/libclang/CMakeLists.txt index 15f7ff94dfead..51ff2e7e15659 100644 --- a/clang/tools/libclang/CMakeLists.txt +++ b/clang/tools/libclang/CMakeLists.txt @@ -174,7 +174,6 @@ install(DIRECTORY ../../include/clang-c DESTINATION "${LIBCLANG_HEADERS_INSTALL_DESTINATION}" FILES_MATCHING PATTERN "*.h" - PATTERN ".svn" EXCLUDE ) # LLVM_DISTRIBUTION_COMPONENTS requires that each component have both a diff --git a/lld/CMakeLists.txt b/lld/CMakeLists.txt index 8b8c7178c616c..82b4b9b9b1981 100644 --- a/lld/CMakeLists.txt +++ b/lld/CMakeLists.txt @@ -195,7 +195,6 @@ if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) DESTINATION include FILES_MATCHING PATTERN "*.h" - PATTERN ".svn" EXCLUDE ) endif() diff --git a/lldb/cmake/modules/LLDBConfig.cmake b/lldb/cmake/modules/LLDBConfig.cmake index 5fbc89892c73c..2fdf1502d0559 100644 --- a/lldb/cmake/modules/LLDBConfig.cmake +++ b/lldb/cmake/modules/LLDBConfig.cmake @@ -228,7 +228,6 @@ if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) DESTINATION include FILES_MATCHING PATTERN "*.h" - PATTERN ".svn" EXCLUDE PATTERN ".cmake" EXCLUDE ) @@ -237,7 +236,6 @@ if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) DESTINATION include FILES_MATCHING PATTERN "*.h" - PATTERN ".svn" EXCLUDE PATTERN ".cmake" EXCLUDE ) diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index a7ac346ae3643..344ccb6fda2f0 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -1104,7 +1104,6 @@ if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) PATTERN "*.td" PATTERN "*.inc" PATTERN "LICENSE.TXT" - PATTERN ".svn" EXCLUDE ) install(DIRECTORY ${LLVM_INCLUDE_DIR}/llvm ${LLVM_INCLUDE_DIR}/llvm-c @@ -1118,7 +1117,6 @@ if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) # Exclude include/llvm/CMakeFiles/intrinsics_gen.dir, matched by "*.def" PATTERN "CMakeFiles" EXCLUDE PATTERN "config.h" EXCLUDE - PATTERN ".svn" EXCLUDE ) if (LLVM_INSTALL_MODULEMAPS) diff --git a/llvm/cmake/modules/CMakeLists.txt b/llvm/cmake/modules/CMakeLists.txt index 4b8879f65fe47..505dc9a29d700 100644 --- a/llvm/cmake/modules/CMakeLists.txt +++ b/llvm/cmake/modules/CMakeLists.txt @@ -93,7 +93,6 @@ set(llvm_config_include_buildtree_only_exports) file(COPY . DESTINATION ${llvm_cmake_builddir} FILES_MATCHING PATTERN *.cmake - PATTERN .svn EXCLUDE PATTERN CMakeFiles EXCLUDE ) @@ -152,7 +151,6 @@ if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) DESTINATION ${LLVM_INSTALL_PACKAGE_DIR} COMPONENT cmake-exports FILES_MATCHING PATTERN *.cmake - PATTERN .svn EXCLUDE PATTERN LLVMConfig.cmake EXCLUDE PATTERN LLVMConfigExtensions.cmake EXCLUDE PATTERN LLVMConfigVersion.cmake EXCLUDE diff --git a/polly/CMakeLists.txt b/polly/CMakeLists.txt index fe7f6b78b4792..ca7c04c565bb6 100644 --- a/polly/CMakeLists.txt +++ b/polly/CMakeLists.txt @@ -125,7 +125,6 @@ if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) DESTINATION include FILES_MATCHING PATTERN "*.h" - PATTERN ".svn" EXCLUDE ) install(DIRECTORY ${POLLY_BINARY_DIR}/include/ @@ -133,7 +132,6 @@ if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) FILES_MATCHING PATTERN "*.h" PATTERN "CMakeFiles" EXCLUDE - PATTERN ".svn" EXCLUDE ) endif() diff --git a/polly/lib/External/CMakeLists.txt b/polly/lib/External/CMakeLists.txt index c953ea48475d7..8991094d92c7b 100644 --- a/polly/lib/External/CMakeLists.txt +++ b/polly/lib/External/CMakeLists.txt @@ -279,7 +279,6 @@ if (POLLY_BUNDLED_ISL) FILES_MATCHING PATTERN "*.h" PATTERN "CMakeFiles" EXCLUDE - PATTERN ".svn" EXCLUDE ) endif() From fa5fa63fd140f2d4bad0357839378606a583b32c Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Thu, 15 Oct 2020 17:22:33 +0200 Subject: [PATCH 050/179] [lldb] Port lldb gdb-server to libOption The existing help text was very terse and was missing several important options. In the new version, I add a short description of each option and a slightly longer description of the tool as a whole. The new option list does not include undocumented no-op options: --debug and --verbose. It also does not include undocumented short aliases for long options, with two exceptions: -h, because it's well-known; and -S (--setsid), as it's used in one test. Using these options will now produce an error. I believe that is acceptable as users aren't generally invoking lldb-server directly, and the only way to learn about the short aliases was by looking at the source. Differential Revision: https://reviews.llvm.org/D89477 --- lldb/include/lldb/Utility/Args.h | 1 + lldb/source/Utility/Args.cpp | 5 + .../Shell/lldb-server/TestErrorMessages.test | 14 + lldb/tools/lldb-server/CMakeLists.txt | 10 + lldb/tools/lldb-server/LLGSOptions.td | 62 ++++ lldb/tools/lldb-server/lldb-gdbserver.cpp | 288 ++++++++---------- 6 files changed, 225 insertions(+), 155 deletions(-) create mode 100644 lldb/test/Shell/lldb-server/TestErrorMessages.test create mode 100644 lldb/tools/lldb-server/LLGSOptions.td diff --git a/lldb/include/lldb/Utility/Args.h b/lldb/include/lldb/Utility/Args.h index 2cce7d0c697c7..82e6d147ae566 100644 --- a/lldb/include/lldb/Utility/Args.h +++ b/lldb/include/lldb/Utility/Args.h @@ -66,6 +66,7 @@ class Args { Args(const Args &rhs); explicit Args(const StringList &list); + explicit Args(llvm::ArrayRef args); Args &operator=(const Args &rhs); diff --git a/lldb/source/Utility/Args.cpp b/lldb/source/Utility/Args.cpp index 4f3285404b6d2..2cbe727ed2408 100644 --- a/lldb/source/Utility/Args.cpp +++ b/lldb/source/Utility/Args.cpp @@ -175,6 +175,11 @@ Args::Args(const StringList &list) : Args() { AppendArgument(arg); } +Args::Args(llvm::ArrayRef args) : Args() { + for (llvm::StringRef arg : args) + AppendArgument(arg); +} + Args &Args::operator=(const Args &rhs) { Clear(); diff --git a/lldb/test/Shell/lldb-server/TestErrorMessages.test b/lldb/test/Shell/lldb-server/TestErrorMessages.test new file mode 100644 index 0000000000000..ef64ec6e5aba3 --- /dev/null +++ b/lldb/test/Shell/lldb-server/TestErrorMessages.test @@ -0,0 +1,14 @@ +RUN: lldb-server gdbserver --fd 2>&1 | FileCheck --check-prefixes=FD1,ALL %s +FD1: error: --fd: missing argument + +RUN: lldb-server gdbserver --fd three 2>&1 | FileCheck --check-prefixes=FD2,ALL %s +FD2: error: invalid '--fd' argument + +RUN: lldb-server gdbserver --bogus 2>&1 | FileCheck --check-prefixes=BOGUS,ALL %s +BOGUS: error: unknown argument '--bogus' + +RUN: lldb-server gdbserver 2>&1 | FileCheck --check-prefixes=CONN,ALL %s +CONN: error: no connection arguments + +ALL: Use '{{.*}} g[dbserver] --help' for a complete list of options. + diff --git a/lldb/tools/lldb-server/CMakeLists.txt b/lldb/tools/lldb-server/CMakeLists.txt index 6e7b30df5c581..930c327cf072c 100644 --- a/lldb/tools/lldb-server/CMakeLists.txt +++ b/lldb/tools/lldb-server/CMakeLists.txt @@ -1,3 +1,8 @@ +set(LLVM_TARGET_DEFINITIONS LLGSOptions.td) +tablegen(LLVM LLGSOptions.inc -gen-opt-parser-defs) +add_public_tablegen_target(LLGSOptionsTableGen) +set_target_properties(LLGSOptionsTableGen PROPERTIES FOLDER "lldb misc") + set(LLDB_PLUGINS) if(CMAKE_SYSTEM_NAME MATCHES "Linux|Android") @@ -53,8 +58,13 @@ add_lldb_tool(lldb-server ${LLDB_SYSTEM_LIBS} LINK_COMPONENTS + Option Support ) +add_dependencies(lldb-server + LLGSOptionsTableGen + ${tablegen_deps} +) target_include_directories(lldb-server PRIVATE "${LLDB_SOURCE_DIR}/source") target_link_libraries(lldb-server PRIVATE ${LLDB_SYSTEM_LIBS}) diff --git a/lldb/tools/lldb-server/LLGSOptions.td b/lldb/tools/lldb-server/LLGSOptions.td new file mode 100644 index 0000000000000..429a4671764f3 --- /dev/null +++ b/lldb/tools/lldb-server/LLGSOptions.td @@ -0,0 +1,62 @@ +include "llvm/Option/OptParser.td" + +class F: Flag<["--", "-"], name>; +class R prefixes, string name> + : Option; + +multiclass SJ { + def NAME: Separate<["--", "-"], name>, + HelpText; + def NAME # _eq: Joined<["--", "-"], name # "=">, + Alias(NAME)>; +} + +def grp_connect : OptionGroup<"connection">, HelpText<"CONNECTION">; + +defm fd: SJ<"fd", "Communicate over the given file descriptor.">, + MetaVarName<"">, + Group; + +defm named_pipe: SJ<"named-pipe", "Write port lldb-server will listen on to the given named pipe.">, + MetaVarName<"">, + Group; + +defm pipe: SJ<"pipe", "Write port lldb-server will listen on to the given file descriptor.">, + MetaVarName<"">, + Group; + +def reverse_connect: F<"reverse-connect">, + HelpText<"Connect to the client instead of passively waiting for a connection. In this case [host]:port denotes the remote address to connect to.">, + Group; + +def grp_general : OptionGroup<"general options">, HelpText<"GENERAL OPTIONS">; + +defm log_channels: SJ<"log-channels", "Channels to log. A colon-separated list of entries. Each entry starts with a channel followed by a space-separated list of categories.">, + MetaVarName<"">, + Group; + +defm log_file: SJ<"log-file", "Destination file to log to. If empty, log to stderr.">, + MetaVarName<"">, + Group; + +def setsid: F<"setsid">, HelpText<"Run lldb-server in a new session.">, + Group; +def: Flag<["-"], "S">, Alias, + Group; + +def help: F<"help">, HelpText<"Prints out the usage information for lldb-server.">, + Group; +def: Flag<["-"], "h">, Alias, + Group; + +def grp_target : OptionGroup<"target selection">, HelpText<"TARGET SELECTION">; + +defm attach: SJ<"attach", "Attach to the process given by a (numeric) process id or a name.">, + MetaVarName<"">, + Group; + +def REM : R<["--"], "">, HelpText<"Launch program for debugging.">, + MetaVarName<"program args">, + Group; + +def: F<"native-regs">; // Noop. Present for backwards compatibility only. diff --git a/lldb/tools/lldb-server/lldb-gdbserver.cpp b/lldb/tools/lldb-server/lldb-gdbserver.cpp index 633e37c3a0435..0fbb13800bf74 100644 --- a/lldb/tools/lldb-server/lldb-gdbserver.cpp +++ b/lldb/tools/lldb-server/lldb-gdbserver.cpp @@ -17,7 +17,6 @@ #include #endif - #include "Acceptor.h" #include "LLDBServerUtilities.h" #include "Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.h" @@ -25,8 +24,6 @@ #include "lldb/Host/Config.h" #include "lldb/Host/ConnectionFileDescriptor.h" #include "lldb/Host/FileSystem.h" -#include "lldb/Host/HostGetOpt.h" -#include "lldb/Host/OptionParser.h" #include "lldb/Host/Pipe.h" #include "lldb/Host/Socket.h" #include "lldb/Host/StringConvert.h" @@ -34,7 +31,11 @@ #include "lldb/Target/Process.h" #include "lldb/Utility/Status.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Option/ArgList.h" +#include "llvm/Option/OptTable.h" +#include "llvm/Option/Option.h" #include "llvm/Support/Errno.h" +#include "llvm/Support/WithColor.h" #if defined(__linux__) #include "Plugins/Process/Linux/NativeProcessLinux.h" @@ -88,31 +89,6 @@ class NativeProcessFactory : public NativeProcessProtocol::Factory { #endif } -// option descriptors for getopt_long_only() - -static int g_debug = 0; -static int g_verbose = 0; - -static struct option g_long_options[] = { - {"debug", no_argument, &g_debug, 1}, - {"verbose", no_argument, &g_verbose, 1}, - {"log-file", required_argument, nullptr, 'l'}, - {"log-channels", required_argument, nullptr, 'c'}, - {"attach", required_argument, nullptr, 'a'}, - {"named-pipe", required_argument, nullptr, 'N'}, - {"pipe", required_argument, nullptr, 'U'}, - {"native-regs", no_argument, nullptr, - 'r'}, // Specify to use the native registers instead of the gdb defaults - // for the architecture. NOTE: this is a do-nothing arg as it's - // behavior is default now. FIXME remove call from lldb-platform. - {"reverse-connect", no_argument, nullptr, - 'R'}, // Specifies that llgs attaches to the client address:port rather - // than llgs listening for a connection from address on port. - {"setsid", no_argument, nullptr, - 'S'}, // Call setsid() to make llgs run in its own session. - {"fd", required_argument, nullptr, 'F'}, - {nullptr, 0, nullptr, 0}}; - #ifndef _WIN32 // Watch for signals static int g_sighup_received_count = 0; @@ -129,20 +105,6 @@ static void sighup_handler(MainLoopBase &mainloop) { } #endif // #ifndef _WIN32 -static void display_usage(const char *progname, const char *subcommand) { - fprintf(stderr, "Usage:\n %s %s " - "[--log-file log-file-name] " - "[--log-channels log-channel-list] " - "[--setsid] " - "[--fd file-descriptor]" - "[--named-pipe named-pipe-path] " - "[--native-regs] " - "[--attach pid] " - "[[HOST]:PORT] " - "[-- PROGRAM ARG1 ARG2 ...]\n", - progname, subcommand); -} - void handle_attach_to_pid(GDBRemoteCommunicationServerLLGS &gdb_server, lldb::pid_t pid) { Status error = gdb_server.AttachToProcess(pid); @@ -176,12 +138,12 @@ void handle_attach(GDBRemoteCommunicationServerLLGS &gdb_server, handle_attach_to_process_name(gdb_server, attach_target); } -void handle_launch(GDBRemoteCommunicationServerLLGS &gdb_server, int argc, - const char *const argv[]) { +void handle_launch(GDBRemoteCommunicationServerLLGS &gdb_server, + llvm::ArrayRef Arguments) { ProcessLaunchInfo info; info.GetFlags().Set(eLaunchFlagStopAtEntry | eLaunchFlagDebug | eLaunchFlagDisableASLR); - info.SetArguments(const_cast(argv), true); + info.SetArguments(Args(Arguments), true); llvm::SmallString<64> cwd; if (std::error_code ec = llvm::sys::fs::current_path(cwd)) { @@ -198,7 +160,7 @@ void handle_launch(GDBRemoteCommunicationServerLLGS &gdb_server, int argc, Status error = gdb_server.LaunchProcess(); if (error.Fail()) { llvm::errs() << llvm::formatv("error: failed to launch '{0}': {1}\n", - argv[0], error); + Arguments[0], error); exit(1); } } @@ -229,7 +191,7 @@ Status writeSocketIdToPipe(lldb::pipe_t unnamed_pipe, void ConnectToRemote(MainLoop &mainloop, GDBRemoteCommunicationServerLLGS &gdb_server, - bool reverse_connect, const char *const host_and_port, + bool reverse_connect, llvm::StringRef host_and_port, const char *const progname, const char *const subcommand, const char *const named_pipe_path, pipe_t unnamed_pipe, int connection_fd) { @@ -258,7 +220,7 @@ void ConnectToRemote(MainLoop &mainloop, connection_url, error.AsCString()); exit(-1); } - } else if (host_and_port && host_and_port[0]) { + } else if (!host_and_port.empty()) { // Parse out host and port. std::string final_host_and_port; std::string connection_host; @@ -269,7 +231,7 @@ void ConnectToRemote(MainLoop &mainloop, // expect the remainder to be the port. if (host_and_port[0] == ':') final_host_and_port.append("localhost"); - final_host_and_port.append(host_and_port); + final_host_and_port.append(host_and_port.str()); // Note: use rfind, because the host/port may look like "[::1]:12345". const std::string::size_type colon_pos = final_host_and_port.rfind(':'); @@ -361,7 +323,57 @@ void ConnectToRemote(MainLoop &mainloop, printf("Connection established.\n"); } -// main +namespace { +enum ID { + OPT_INVALID = 0, // This is not an option ID. +#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ + HELPTEXT, METAVAR, VALUES) \ + OPT_##ID, +#include "LLGSOptions.inc" +#undef OPTION +}; + +#define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE; +#include "LLGSOptions.inc" +#undef PREFIX + +const opt::OptTable::Info InfoTable[] = { +#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ + HELPTEXT, METAVAR, VALUES) \ + { \ + PREFIX, NAME, HELPTEXT, \ + METAVAR, OPT_##ID, opt::Option::KIND##Class, \ + PARAM, FLAGS, OPT_##GROUP, \ + OPT_##ALIAS, ALIASARGS, VALUES}, +#include "LLGSOptions.inc" +#undef OPTION +}; + +class LLGSOptTable : public opt::OptTable { +public: + LLGSOptTable() : OptTable(InfoTable) {} + + void PrintHelp(llvm::StringRef Name) { + std::string Usage = + (Name + " [options] [[host]:port] [[--] program args...]").str(); + OptTable::PrintHelp(llvm::outs(), Usage.c_str(), "lldb-server"); + llvm::outs() << R"( +DESCRIPTION + lldb-server connects to the LLDB client, which drives the debugging session. + If no connection options are given, the [host]:port argument must be present + and will denote the address that lldb-server will listen on. [host] defaults + to "localhost" if empty. Port can be zero, in which case the port number will + be chosen dynamically and written to destinations given by --named-pipe and + --pipe arguments. + + If no target is selected at startup, lldb-server can be directed by the LLDB + client to launch or attach to a process. + +)"; + } +}; +} // namespace + int main_gdbserver(int argc, char *argv[]) { Status error; MainLoop mainloop; @@ -374,10 +386,6 @@ int main_gdbserver(int argc, char *argv[]) { const char *progname = argv[0]; const char *subcommand = argv[1]; - argc--; - argv++; - int long_option_index = 0; - int ch; std::string attach_target; std::string named_pipe_path; std::string log_file; @@ -390,94 +398,69 @@ int main_gdbserver(int argc, char *argv[]) { // ProcessLaunchInfo launch_info; ProcessAttachInfo attach_info; - bool show_usage = false; - int option_error = 0; -#if __GLIBC__ - optind = 0; -#else - optreset = 1; - optind = 1; -#endif - - std::string short_options(OptionParser::GetShortOptionString(g_long_options)); - - while ((ch = getopt_long_only(argc, argv, short_options.c_str(), - g_long_options, &long_option_index)) != -1) { - switch (ch) { - case 0: // Any optional that auto set themselves will return 0 - break; - - case 'l': // Set Log File - if (optarg && optarg[0]) - log_file.assign(optarg); - break; - - case 'c': // Log Channels - if (optarg && optarg[0]) - log_channels = StringRef(optarg); - break; - - case 'N': // named pipe - if (optarg && optarg[0]) - named_pipe_path = optarg; - break; - - case 'U': // unnamed pipe - if (optarg && optarg[0]) - unnamed_pipe = (pipe_t)StringConvert::ToUInt64(optarg, -1); - break; - - case 'r': - // Do nothing, native regs is the default these days - break; - - case 'R': - reverse_connect = true; - break; + LLGSOptTable Opts; + llvm::BumpPtrAllocator Alloc; + llvm::StringSaver Saver(Alloc); + bool HasError = false; + opt::InputArgList Args = Opts.parseArgs(argc - 1, argv + 1, OPT_UNKNOWN, + Saver, [&](llvm::StringRef Msg) { + WithColor::error() << Msg << "\n"; + HasError = true; + }); + std::string Name = + (llvm::sys::path::filename(argv[0]) + " g[dbserver]").str(); + std::string HelpText = + "Use '" + Name + " --help' for a complete list of options.\n"; + if (HasError) { + llvm::errs() << HelpText; + return 1; + } - case 'F': - connection_fd = StringConvert::ToUInt32(optarg, -1); - break; + if (Args.hasArg(OPT_help)) { + Opts.PrintHelp(Name); + return 0; + } #ifndef _WIN32 - case 'S': - // Put llgs into a new session. Terminals group processes - // into sessions and when a special terminal key sequences - // (like control+c) are typed they can cause signals to go out to - // all processes in a session. Using this --setsid (-S) option - // will cause debugserver to run in its own sessions and be free - // from such issues. - // - // This is useful when llgs is spawned from a command - // line application that uses llgs to do the debugging, - // yet that application doesn't want llgs receiving the - // signals sent to the session (i.e. dying when anyone hits ^C). - { - const ::pid_t new_sid = setsid(); - if (new_sid == -1) { - llvm::errs() << llvm::formatv( - "failed to set new session id for {0} ({1})\n", LLGS_PROGRAM_NAME, - llvm::sys::StrError()); - } + if (Args.hasArg(OPT_setsid)) { + // Put llgs into a new session. Terminals group processes + // into sessions and when a special terminal key sequences + // (like control+c) are typed they can cause signals to go out to + // all processes in a session. Using this --setsid (-S) option + // will cause debugserver to run in its own sessions and be free + // from such issues. + // + // This is useful when llgs is spawned from a command + // line application that uses llgs to do the debugging, + // yet that application doesn't want llgs receiving the + // signals sent to the session (i.e. dying when anyone hits ^C). + { + const ::pid_t new_sid = setsid(); + if (new_sid == -1) { + WithColor::warning() + << llvm::formatv("failed to set new session id for {0} ({1})\n", + LLGS_PROGRAM_NAME, llvm::sys::StrError()); } - break; + } + } #endif - case 'a': // attach {pid|process_name} - if (optarg && optarg[0]) - attach_target = optarg; - break; - - case 'h': /* fall-through is intentional */ - case '?': - show_usage = true; - break; + log_file = Args.getLastArgValue(OPT_log_file).str(); + log_channels = Args.getLastArgValue(OPT_log_channels); + named_pipe_path = Args.getLastArgValue(OPT_named_pipe).str(); + reverse_connect = Args.hasArg(OPT_reverse_connect); + attach_target = Args.getLastArgValue(OPT_attach).str(); + if (Args.hasArg(OPT_pipe)) { + if (!llvm::to_integer(Args.getLastArgValue(OPT_pipe), unnamed_pipe)) { + WithColor::error() << "invalid '--pipe' argument\n" << HelpText; + return 1; } } - - if (show_usage || option_error) { - display_usage(progname, subcommand); - exit(option_error); + if (Args.hasArg(OPT_fd)) { + if (!llvm::to_integer(Args.getLastArgValue(OPT_fd), connection_fd)) { + WithColor::error() << "invalid '--fd' argument\n" << HelpText; + return 1; + } } if (!LLDBServerUtilities::SetupLogging( @@ -486,30 +469,26 @@ int main_gdbserver(int argc, char *argv[]) { LLDB_LOG_OPTION_PREPEND_FILE_FUNCTION)) return -1; - Log *log(lldb_private::GetLogIfAnyCategoriesSet(GDBR_LOG_PROCESS)); - if (log) { - LLDB_LOGF(log, "lldb-server launch"); - for (int i = 0; i < argc; i++) { - LLDB_LOGF(log, "argv[%i] = '%s'", i, argv[i]); - } + std::vector Inputs; + for (opt::Arg *Arg : Args.filtered(OPT_INPUT)) + Inputs.push_back(Arg->getValue()); + if (opt::Arg *Arg = Args.getLastArg(OPT_REM)) { + for (const char *Val : Arg->getValues()) + Inputs.push_back(Val); } - - // Skip any options we consumed with getopt_long_only. - argc -= optind; - argv += optind; - - if (argc == 0 && connection_fd == -1) { - fputs("No arguments\n", stderr); - display_usage(progname, subcommand); - exit(255); + if (Inputs.empty() && connection_fd == -1) { + WithColor::error() << "no connection arguments\n" << HelpText; + return 1; } NativeProcessFactory factory; GDBRemoteCommunicationServerLLGS gdb_server(mainloop, factory); - const char *const host_and_port = argv[0]; - argc -= 1; - argv += 1; + llvm::StringRef host_and_port; + if (!Inputs.empty()) { + host_and_port = Inputs.front(); + Inputs.erase(Inputs.begin()); + } // Any arguments left over are for the program that we need to launch. If // there @@ -520,8 +499,8 @@ int main_gdbserver(int argc, char *argv[]) { // explicitly asked to attach with the --attach={pid|program_name} form. if (!attach_target.empty()) handle_attach(gdb_server, attach_target); - else if (argc > 0) - handle_launch(gdb_server, argc, argv); + else if (!Inputs.empty()) + handle_launch(gdb_server, Inputs); // Print version info. printf("%s-%s\n", LLGS_PROGRAM_NAME, LLGS_VERSION_STR); @@ -532,7 +511,6 @@ int main_gdbserver(int argc, char *argv[]) { if (!gdb_server.IsConnected()) { fprintf(stderr, "no connection information provided, unable to run\n"); - display_usage(progname, subcommand); return 1; } From 26459e6d8eeb3c2e61c70d207de7e10a00b4ed1c Mon Sep 17 00:00:00 2001 From: Christopher Tetreault Date: Wed, 21 Oct 2020 07:23:54 -0700 Subject: [PATCH 051/179] Fix "Unknown arguments specified" to if in lldb Reviewed By: labath Differential Revision: https://reviews.llvm.org/D89807 --- lldb/cmake/modules/FindPythonAndSwig.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/cmake/modules/FindPythonAndSwig.cmake b/lldb/cmake/modules/FindPythonAndSwig.cmake index de274ede5dbf6..dcbc386b70ddc 100644 --- a/lldb/cmake/modules/FindPythonAndSwig.cmake +++ b/lldb/cmake/modules/FindPythonAndSwig.cmake @@ -48,7 +48,7 @@ else() get_property(MULTI_CONFIG GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG) if ("${Python3_VERSION}" VERSION_GREATER_EQUAL "3.7" AND "${SWIG_VERSION}" VERSION_LESS "4.0" AND WIN32 AND ( - ${MULTI_CONFIG} OR (${uppercase_CMAKE_BUILD_TYPE} STREQUAL "DEBUG"))) + ${MULTI_CONFIG} OR (uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG"))) # Technically this can happen with non-Windows builds too, but we are not # able to detect whether Python was built with assertions, and only Windows # has the requirement that Debug LLDB must use Debug Python. From 5d796645d6c8cadeb003715c33e231a8ba05b6de Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Fri, 26 Jun 2020 12:08:59 -0400 Subject: [PATCH 052/179] [take 2] [libc++] Include <__config_site> from <__config> Prior to this patch, we would generate a fancy <__config> header by concatenating <__config_site> and <__config>. This complexifies the build system and also increases the difference between what's tested and what's actually installed. This patch removes that complexity and instead simply installs <__config_site> alongside the libc++ headers. <__config_site> is then included by <__config>, which is much simpler. Doing this also opens the door to having different <__config_site> headers depending on the target, which was impossible before. It does change the workflow for testing header-only changes to libc++. Previously, we would run `lit` against the headers in libcxx/include. After this patch, we run it against a fake installation root of the headers (containing a proper <__config_site> header). This makes use closer to testing what we actually install, which is good, however it does mean that we have to update that root before testing header changes. Thus, we now need to run `ninja check-cxx-deps` before running `lit` by hand. This commit was originally applied in 1e46d1aa3 and reverted in eb60c487 because it broke the libc++abi and libunwind test suites. This has now been fixed. Differential Revision: https://reviews.llvm.org/D89041 --- libcxx/CMakeLists.txt | 1 + libcxx/docs/TestingLibcxx.rst | 10 ++++- libcxx/include/CMakeLists.txt | 56 +++++-------------------- libcxx/include/__config | 2 + libcxx/test/configs/legacy.cfg.in | 1 + libcxx/utils/libcxx/test/config.py | 19 +-------- libcxxabi/CMakeLists.txt | 4 -- libcxxabi/src/CMakeLists.txt | 6 +-- libcxxabi/test/libcxxabi/test/config.py | 3 +- libcxxabi/test/lit.site.cfg.in | 1 - libunwind/test/libunwind/test/config.py | 2 - 11 files changed, 28 insertions(+), 77 deletions(-) diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt index 015a359bfb487..bdfecf6c0c607 100644 --- a/libcxx/CMakeLists.txt +++ b/libcxx/CMakeLists.txt @@ -31,6 +31,7 @@ set(CMAKE_MODULE_PATH set(LIBCXX_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) set(LIBCXX_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) set(LIBCXX_BINARY_INCLUDE_DIR "${LIBCXX_BINARY_DIR}/include/c++build") +set(LIBCXX_GENERATED_INCLUDE_DIR "${LIBCXX_BINARY_DIR}/include/c++/v1") if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR LIBCXX_STANDALONE_BUILD) project(libcxx CXX C) diff --git a/libcxx/docs/TestingLibcxx.rst b/libcxx/docs/TestingLibcxx.rst index d42becfd5c3d8..ec017e23b147a 100644 --- a/libcxx/docs/TestingLibcxx.rst +++ b/libcxx/docs/TestingLibcxx.rst @@ -26,8 +26,8 @@ Usage After building libc++, you can run parts of the libc++ test suite by simply running ``llvm-lit`` on a specified test or directory. If you're unsure -whether the required libraries have been built, you can use the -`check-cxx-deps` target. For example: +whether the required targets have been built, you can use the `check-cxx-deps` +target to build them. For example: .. code-block:: bash @@ -37,6 +37,12 @@ whether the required libraries have been built, you can use the $ /bin/llvm-lit -sv libcxx/test/std/depr/depr.c.headers/stdlib_h.pass.cpp # Run a single test $ /bin/llvm-lit -sv libcxx/test/std/atomics libcxx/test/std/threads # Test std::thread and std::atomic +In the default configuration, the tests are built against headers that form a +fake installation root of libc++. This installation root has to be updated when +changes are made to the headers, so you should re-run the `check-cxx-deps` target +before running the tests manually with `lit` when you make any sort of change, +including to the headers. + Sometimes you'll want to change the way LIT is running the tests. Custom options can be specified using the `--param==` flag. The most common option you'll want to change is the standard dialect (ie -std=c++XX). By default the diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index 7c97db41bb73a..a8d6f74ea38f0 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -2,6 +2,7 @@ set(files __bit_reference __bsd_locale_defaults.h __bsd_locale_fallbacks.h + __config __errc __debug __functional_03 @@ -184,62 +185,28 @@ if(LIBCXX_INSTALL_SUPPORT_HEADERS) ) endif() -configure_file("__config_site.in" - "${LIBCXX_BINARY_DIR}/__config_site" - @ONLY) - -# Generate a custom __config header. The new header is created -# by prepending __config_site to the current __config header. -add_custom_command(OUTPUT ${LIBCXX_BINARY_DIR}/__generated_config - COMMAND ${Python3_EXECUTABLE} ${LIBCXX_SOURCE_DIR}/utils/cat_files.py - ${LIBCXX_BINARY_DIR}/__config_site - ${LIBCXX_SOURCE_DIR}/include/__config - -o ${LIBCXX_BINARY_DIR}/__generated_config - DEPENDS ${LIBCXX_SOURCE_DIR}/include/__config - ${LIBCXX_BINARY_DIR}/__config_site -) -# Add a target that executes the generation commands. -add_custom_target(cxx-generated-config ALL - DEPENDS ${LIBCXX_BINARY_DIR}/__generated_config) - if(LIBCXX_HEADER_DIR) - set(output_dir ${LIBCXX_HEADER_DIR}/include/c++/v1) + configure_file("__config_site.in" "${LIBCXX_GENERATED_INCLUDE_DIR}/__config_site" @ONLY) - set(out_files) + set(_all_includes "${LIBCXX_GENERATED_INCLUDE_DIR}/__config_site") foreach(f ${files}) - set(src ${CMAKE_CURRENT_SOURCE_DIR}/${f}) - set(dst ${output_dir}/${f}) + set(src "${CMAKE_CURRENT_SOURCE_DIR}/${f}") + set(dst "${LIBCXX_GENERATED_INCLUDE_DIR}/${f}") add_custom_command(OUTPUT ${dst} DEPENDS ${src} COMMAND ${CMAKE_COMMAND} -E copy_if_different ${src} ${dst} COMMENT "Copying CXX header ${f}") - list(APPEND out_files ${dst}) + list(APPEND _all_includes "${dst}") endforeach() - - # Copy the generated header as __config into build directory. - set(src ${LIBCXX_BINARY_DIR}/__generated_config) - set(dst ${output_dir}/__config) - add_custom_command(OUTPUT ${dst} - DEPENDS ${src} cxx-generated-config - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${src} ${dst} - COMMENT "Copying CXX __config") - list(APPEND out_files ${dst}) - add_custom_target(generate-cxx-headers DEPENDS ${out_files}) + add_custom_target(generate-cxx-headers DEPENDS ${_all_includes}) add_library(cxx-headers INTERFACE) add_dependencies(cxx-headers generate-cxx-headers ${LIBCXX_CXX_ABI_HEADER_TARGET}) # TODO: Use target_include_directories once we figure out why that breaks the runtimes build if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC" OR "${CMAKE_CXX_SIMULATE_ID}" STREQUAL "MSVC") - target_compile_options(cxx-headers INTERFACE /I "${output_dir}") - else() - target_compile_options(cxx-headers INTERFACE -I "${output_dir}") - endif() - - # Make sure the generated __config_site header is included when we build the library. - if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC" OR "${CMAKE_CXX_SIMULATE_ID}" STREQUAL "MSVC") - target_compile_options(cxx-headers INTERFACE /FI "${LIBCXX_BINARY_DIR}/__config_site") + target_compile_options(cxx-headers INTERFACE /I "${LIBCXX_GENERATED_INCLUDE_DIR}") else() - target_compile_options(cxx-headers INTERFACE -include "${LIBCXX_BINARY_DIR}/__config_site") + target_compile_options(cxx-headers INTERFACE -I "${LIBCXX_GENERATED_INCLUDE_DIR}") endif() else() add_library(cxx-headers INTERFACE) @@ -255,11 +222,10 @@ if (LIBCXX_INSTALL_HEADERS) ) endforeach() - # Install the generated header as __config. - install(FILES ${LIBCXX_BINARY_DIR}/__generated_config + # Install the generated __config_site. + install(FILES ${LIBCXX_BINARY_DIR}/__config_site DESTINATION ${LIBCXX_INSTALL_HEADER_PREFIX}include/c++/v1 PERMISSIONS OWNER_READ OWNER_WRITE GROUP_READ WORLD_READ - RENAME __config COMPONENT cxx-headers) if (NOT CMAKE_CONFIGURATION_TYPES) diff --git a/libcxx/include/__config b/libcxx/include/__config index 8d2ab2e51b115..d99e972004b86 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -10,6 +10,8 @@ #ifndef _LIBCPP_CONFIG #define _LIBCPP_CONFIG +#include <__config_site> + #if defined(_MSC_VER) && !defined(__clang__) # if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # define _LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER diff --git a/libcxx/test/configs/legacy.cfg.in b/libcxx/test/configs/legacy.cfg.in index f0a4e8a73e09a..148ac3fc5a899 100644 --- a/libcxx/test/configs/legacy.cfg.in +++ b/libcxx/test/configs/legacy.cfg.in @@ -3,6 +3,7 @@ import os import site +config.cxx_headers = "@LIBCXX_GENERATED_INCLUDE_DIR@" config.cxx_under_test = "@CMAKE_CXX_COMPILER@" config.project_obj_root = "@CMAKE_BINARY_DIR@" config.libcxx_src_root = "@LIBCXX_SOURCE_DIR@" diff --git a/libcxx/utils/libcxx/test/config.py b/libcxx/utils/libcxx/test/config.py index 0d21aa17afd27..c5050d1b6b9a7 100644 --- a/libcxx/utils/libcxx/test/config.py +++ b/libcxx/utils/libcxx/test/config.py @@ -331,7 +331,6 @@ def configure_default_compile_flags(self): def configure_compile_flags_header_includes(self): support_path = os.path.join(self.libcxx_src_root, 'test', 'support') - self.configure_config_site_header() if self.cxx_stdlib_under_test != 'libstdc++' and \ not self.target_info.is_windows(): self.cxx.compile_flags += [ @@ -348,16 +347,12 @@ def configure_compile_flags_header_includes(self): 'set_windows_crt_report_mode.h') ] cxx_headers = self.get_lit_conf('cxx_headers') - if cxx_headers == '' or (cxx_headers is None - and self.cxx_stdlib_under_test != 'libc++'): + if cxx_headers is None and self.cxx_stdlib_under_test != 'libc++': self.lit_config.note('using the system cxx headers') return self.cxx.compile_flags += ['-nostdinc++'] - if cxx_headers is None: - cxx_headers = os.path.join(self.libcxx_src_root, 'include') if not os.path.isdir(cxx_headers): - self.lit_config.fatal("cxx_headers='%s' is not a directory." - % cxx_headers) + self.lit_config.fatal("cxx_headers='{}' is not a directory.".format(cxx_headers)) self.cxx.compile_flags += ['-I' + cxx_headers] if self.libcxx_obj_root is not None: cxxabi_headers = os.path.join(self.libcxx_obj_root, 'include', @@ -365,16 +360,6 @@ def configure_compile_flags_header_includes(self): if os.path.isdir(cxxabi_headers): self.cxx.compile_flags += ['-I' + cxxabi_headers] - def configure_config_site_header(self): - # Check for a possible __config_site in the build directory. We - # use this if it exists. - if self.libcxx_obj_root is None: - return - config_site_header = os.path.join(self.libcxx_obj_root, '__config_site') - if not os.path.isfile(config_site_header): - return - self.cxx.compile_flags += ['-include', config_site_header] - def configure_link_flags(self): # Configure library path self.configure_link_flags_cxx_library_path() diff --git a/libcxxabi/CMakeLists.txt b/libcxxabi/CMakeLists.txt index 3f37dbf4cd36f..f0593bc2a076b 100644 --- a/libcxxabi/CMakeLists.txt +++ b/libcxxabi/CMakeLists.txt @@ -138,10 +138,6 @@ if (NOT LIBCXXABI_ENABLE_SHARED AND NOT LIBCXXABI_ENABLE_STATIC) message(FATAL_ERROR "libc++abi must be built as either a shared or static library.") endif() -set(LIBCXXABI_LIBCXX_INCLUDES "${LIBCXXABI_LIBCXX_PATH}/include" CACHE PATH - "Specify path to libc++ includes.") -message(STATUS "Libc++abi will be using libc++ includes from ${LIBCXXABI_LIBCXX_INCLUDES}") - option(LIBCXXABI_HERMETIC_STATIC_LIBRARY "Do not export any symbols from the static library." OFF) diff --git a/libcxxabi/src/CMakeLists.txt b/libcxxabi/src/CMakeLists.txt index e9e454082a054..8d4d84c225e23 100644 --- a/libcxxabi/src/CMakeLists.txt +++ b/libcxxabi/src/CMakeLists.txt @@ -55,8 +55,6 @@ if (MSVC_IDE OR XCODE) endif() endif() -include_directories("${LIBCXXABI_LIBCXX_INCLUDES}") - if (LIBCXXABI_HAS_CXA_THREAD_ATEXIT_IMPL) add_definitions(-DHAVE___CXA_THREAD_ATEXIT_IMPL) endif() @@ -168,7 +166,7 @@ if (LIBCXXABI_ENABLE_SHARED) if(COMMAND llvm_setup_rpath) llvm_setup_rpath(cxxabi_shared) endif() - target_link_libraries(cxxabi_shared PRIVATE ${LIBCXXABI_SHARED_LIBRARIES} ${LIBCXXABI_LIBRARIES}) + target_link_libraries(cxxabi_shared PRIVATE cxx-headers ${LIBCXXABI_SHARED_LIBRARIES} ${LIBCXXABI_LIBRARIES}) if (TARGET pstl::ParallelSTL) target_link_libraries(cxxabi_shared PUBLIC pstl::ParallelSTL) endif() @@ -233,7 +231,7 @@ endif() # Build the static library. if (LIBCXXABI_ENABLE_STATIC) add_library(cxxabi_static STATIC ${LIBCXXABI_SOURCES} ${LIBCXXABI_HEADERS}) - target_link_libraries(cxxabi_static PRIVATE ${LIBCXXABI_STATIC_LIBRARIES} ${LIBCXXABI_LIBRARIES}) + target_link_libraries(cxxabi_static PRIVATE cxx-headers ${LIBCXXABI_STATIC_LIBRARIES} ${LIBCXXABI_LIBRARIES}) if (TARGET pstl::ParallelSTL) target_link_libraries(cxxabi_static PUBLIC pstl::ParallelSTL) endif() diff --git a/libcxxabi/test/libcxxabi/test/config.py b/libcxxabi/test/libcxxabi/test/config.py index 45fb0f5d7afcb..4d44d88ccc368 100644 --- a/libcxxabi/test/libcxxabi/test/config.py +++ b/libcxxabi/test/libcxxabi/test/config.py @@ -56,10 +56,9 @@ def configure_compile_flags(self): super(Configuration, self).configure_compile_flags() def configure_compile_flags_header_includes(self): - self.configure_config_site_header() cxx_headers = self.get_lit_conf( 'cxx_headers', - os.path.join(self.libcxx_src_root, '/include')) + os.path.join(self.libcxx_obj_root, 'include', 'c++', 'v1')) if cxx_headers == '': self.lit_config.note('using the systems c++ headers') else: diff --git a/libcxxabi/test/lit.site.cfg.in b/libcxxabi/test/lit.site.cfg.in index 87f955e321610..a6d43c6e2da19 100644 --- a/libcxxabi/test/lit.site.cfg.in +++ b/libcxxabi/test/lit.site.cfg.in @@ -9,7 +9,6 @@ config.libcxxabi_src_root = "@LIBCXXABI_SOURCE_DIR@" config.libcxxabi_obj_root = "@LIBCXXABI_BINARY_DIR@" config.abi_library_path = "@LIBCXXABI_LIBRARY_DIR@" config.libcxx_src_root = "@LIBCXXABI_LIBCXX_PATH@" -config.cxx_headers = "@LIBCXXABI_LIBCXX_INCLUDES@" config.libunwind_headers = "@LIBCXXABI_LIBUNWIND_INCLUDES_INTERNAL@" config.cxx_library_root = "@LIBCXXABI_LIBCXX_LIBRARY_PATH@" config.llvm_unwinder = @LIBCXXABI_USE_LLVM_UNWINDER@ diff --git a/libunwind/test/libunwind/test/config.py b/libunwind/test/libunwind/test/config.py index 977f9a0fb3f93..183c48f6a8016 100644 --- a/libunwind/test/libunwind/test/config.py +++ b/libunwind/test/libunwind/test/config.py @@ -49,8 +49,6 @@ def configure_compile_flags(self): super(Configuration, self).configure_compile_flags() def configure_compile_flags_header_includes(self): - self.configure_config_site_header() - libunwind_headers = self.get_lit_conf( 'libunwind_headers', os.path.join(self.libunwind_src_root, 'include')) From d900b755ed003967d1c9675b62293414831db1b6 Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Wed, 21 Oct 2020 17:45:51 +0200 Subject: [PATCH 053/179] [lldb] Fix windows build for fa5fa63fd140f --- lldb/tools/lldb-server/lldb-gdbserver.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lldb/tools/lldb-server/lldb-gdbserver.cpp b/lldb/tools/lldb-server/lldb-gdbserver.cpp index 0fbb13800bf74..9b8c67af14db2 100644 --- a/lldb/tools/lldb-server/lldb-gdbserver.cpp +++ b/lldb/tools/lldb-server/lldb-gdbserver.cpp @@ -451,10 +451,12 @@ int main_gdbserver(int argc, char *argv[]) { reverse_connect = Args.hasArg(OPT_reverse_connect); attach_target = Args.getLastArgValue(OPT_attach).str(); if (Args.hasArg(OPT_pipe)) { - if (!llvm::to_integer(Args.getLastArgValue(OPT_pipe), unnamed_pipe)) { + uint64_t Arg; + if (!llvm::to_integer(Args.getLastArgValue(OPT_pipe), Arg)) { WithColor::error() << "invalid '--pipe' argument\n" << HelpText; return 1; } + unnamed_pipe = (pipe_t)Arg; } if (Args.hasArg(OPT_fd)) { if (!llvm::to_integer(Args.getLastArgValue(OPT_fd), connection_fd)) { From 55dc123555dbb3e7ca717c1ecc0ab6cd934bdee5 Mon Sep 17 00:00:00 2001 From: JonChesterfield Date: Wed, 21 Oct 2020 16:59:16 +0100 Subject: [PATCH 054/179] [libomptarget][amdgcn] Refactor memcpy to eliminate maps [libomptarget][amdgcn] Refactor memcpy to eliminate maps Builds on D89776 to remove now dead code. Reviewed By: pdhaliwal Differential Revision: https://reviews.llvm.org/D89888 --- .../libomptarget/plugins/amdgpu/impl/atmi.cpp | 99 +++++++++++-- .../plugins/amdgpu/impl/atmi_runtime.h | 10 +- .../libomptarget/plugins/amdgpu/impl/data.cpp | 131 +----------------- .../libomptarget/plugins/amdgpu/impl/data.h | 83 ----------- .../libomptarget/plugins/amdgpu/src/rtl.cpp | 30 ++-- 5 files changed, 112 insertions(+), 241 deletions(-) delete mode 100644 openmp/libomptarget/plugins/amdgpu/impl/data.h diff --git a/openmp/libomptarget/plugins/amdgpu/impl/atmi.cpp b/openmp/libomptarget/plugins/amdgpu/impl/atmi.cpp index 0586cd358c25c..285dc2dbe7639 100644 --- a/openmp/libomptarget/plugins/amdgpu/impl/atmi.cpp +++ b/openmp/libomptarget/plugins/amdgpu/impl/atmi.cpp @@ -3,7 +3,13 @@ * * This file is distributed under the MIT License. See LICENSE.txt for details. *===------------------------------------------------------------------------*/ +#include "atmi_runtime.h" +#include "internal.h" #include "rt.h" +#include +#include +#include + /* * Initialize/Finalize */ @@ -33,9 +39,44 @@ atmi_status_t atmi_module_register_from_memory_to_place( /* * Data */ -atmi_status_t atmi_memcpy(hsa_signal_t sig, void *dest, const void *src, - size_t size) { - hsa_status_t rc = hsa_memory_copy(dest, src, size); + +static hsa_status_t invoke_hsa_copy(hsa_signal_t sig, void *dest, + const void *src, size_t size, + hsa_agent_t agent) { + const hsa_signal_value_t init = 1; + const hsa_signal_value_t success = 0; + hsa_signal_store_screlease(sig, init); + + hsa_status_t err = + hsa_amd_memory_async_copy(dest, agent, src, agent, size, 0, NULL, sig); + if (err != HSA_STATUS_SUCCESS) { + return err; + } + + // async_copy reports success by decrementing and failure by setting to < 0 + hsa_signal_value_t got = init; + while (got == init) { + got = hsa_signal_wait_scacquire(sig, HSA_SIGNAL_CONDITION_NE, init, + UINT64_MAX, ATMI_WAIT_STATE); + } + + if (got != success) { + return HSA_STATUS_ERROR; + } + + return err; +} + +struct atmiFreePtrDeletor { + void operator()(void *p) { + atmi_free(p); // ignore failure to free + } +}; + +atmi_status_t atmi_memcpy_h2d(hsa_signal_t signal, void *deviceDest, + const void *hostSrc, size_t size, + hsa_agent_t agent) { + hsa_status_t rc = hsa_memory_copy(deviceDest, hostSrc, size); // hsa_memory_copy sometimes fails in situations where // allocate + copy succeeds. Looks like it might be related to @@ -44,17 +85,53 @@ atmi_status_t atmi_memcpy(hsa_signal_t sig, void *dest, const void *src, return ATMI_STATUS_SUCCESS; } - return core::Runtime::Memcpy(sig, dest, src, size); -} + void *tempHostPtr; + atmi_mem_place_t CPU = ATMI_MEM_PLACE_CPU_MEM(0, 0, 0); + atmi_status_t ret = atmi_malloc(&tempHostPtr, size, CPU); + if (ret != ATMI_STATUS_SUCCESS) { + DEBUG_PRINT("atmi_malloc: Unable to alloc %d bytes for temp scratch\n", + size); + return ret; + } + std::unique_ptr del(tempHostPtr); + memcpy(tempHostPtr, hostSrc, size); -atmi_status_t atmi_memcpy_h2d(hsa_signal_t sig, void *device_dest, - const void *host_src, size_t size) { - return atmi_memcpy(sig, device_dest, host_src, size); + if (invoke_hsa_copy(signal, deviceDest, tempHostPtr, size, agent) != + HSA_STATUS_SUCCESS) { + return ATMI_STATUS_ERROR; + } + return ATMI_STATUS_SUCCESS; } -atmi_status_t atmi_memcpy_d2h(hsa_signal_t sig, void *host_dest, - const void *device_src, size_t size) { - return atmi_memcpy(sig, host_dest, device_src, size); +atmi_status_t atmi_memcpy_d2h(hsa_signal_t signal, void *dest, + const void *deviceSrc, size_t size, + hsa_agent_t agent) { + hsa_status_t rc = hsa_memory_copy(dest, deviceSrc, size); + + // hsa_memory_copy sometimes fails in situations where + // allocate + copy succeeds. Looks like it might be related to + // locking part of a read only segment. Fall back for now. + if (rc == HSA_STATUS_SUCCESS) { + return ATMI_STATUS_SUCCESS; + } + + void *tempHostPtr; + atmi_mem_place_t CPU = ATMI_MEM_PLACE_CPU_MEM(0, 0, 0); + atmi_status_t ret = atmi_malloc(&tempHostPtr, size, CPU); + if (ret != ATMI_STATUS_SUCCESS) { + DEBUG_PRINT("atmi_malloc: Unable to alloc %d bytes for temp scratch\n", + size); + return ret; + } + std::unique_ptr del(tempHostPtr); + + if (invoke_hsa_copy(signal, tempHostPtr, deviceSrc, size, agent) != + HSA_STATUS_SUCCESS) { + return ATMI_STATUS_ERROR; + } + + memcpy(dest, tempHostPtr, size); + return ATMI_STATUS_SUCCESS; } atmi_status_t atmi_free(void *ptr) { return core::Runtime::Memfree(ptr); } diff --git a/openmp/libomptarget/plugins/amdgpu/impl/atmi_runtime.h b/openmp/libomptarget/plugins/amdgpu/impl/atmi_runtime.h index a935b6ad4b759..47022f7f5dea3 100644 --- a/openmp/libomptarget/plugins/amdgpu/impl/atmi_runtime.h +++ b/openmp/libomptarget/plugins/amdgpu/impl/atmi_runtime.h @@ -155,11 +155,13 @@ atmi_status_t atmi_malloc(void **ptr, size_t size, atmi_mem_place_t place); */ atmi_status_t atmi_free(void *ptr); -atmi_status_t atmi_memcpy_h2d(hsa_signal_t sig, void *device_dest, - const void *host_src, size_t size); +atmi_status_t atmi_memcpy_h2d(hsa_signal_t signal, void *deviceDest, + const void *hostSrc, size_t size, + hsa_agent_t agent); -atmi_status_t atmi_memcpy_d2h(hsa_signal_t sig, void *host_dest, - const void *device_src, size_t size); +atmi_status_t atmi_memcpy_d2h(hsa_signal_t sig, void *hostDest, + const void *deviceSrc, size_t size, + hsa_agent_t agent); /** @} */ diff --git a/openmp/libomptarget/plugins/amdgpu/impl/data.cpp b/openmp/libomptarget/plugins/amdgpu/impl/data.cpp index 91627bd8d4b31..39546fbae4b3b 100644 --- a/openmp/libomptarget/plugins/amdgpu/impl/data.cpp +++ b/openmp/libomptarget/plugins/amdgpu/impl/data.cpp @@ -3,7 +3,6 @@ * * This file is distributed under the MIT License. See LICENSE.txt for details. *===------------------------------------------------------------------------*/ -#include "data.h" #include "atmi_runtime.h" #include "internal.h" #include "machine.h" @@ -21,7 +20,6 @@ using core::TaskImpl; extern ATLMachine g_atl_machine; namespace core { -ATLPointerTracker g_data_map; // Track all am pointer allocations. void allow_access_to_all_gpu_agents(void *ptr); const char *getPlaceStr(atmi_devtype_t type) { @@ -35,37 +33,6 @@ const char *getPlaceStr(atmi_devtype_t type) { } } -std::ostream &operator<<(std::ostream &os, const ATLData *ap) { - atmi_mem_place_t place = ap->place(); - os << " devicePointer:" << ap->ptr() << " sizeBytes:" << ap->size() - << " place:(" << getPlaceStr(place.dev_type) << ", " << place.dev_id - << ", " << place.mem_id << ")"; - return os; -} - -void ATLPointerTracker::insert(void *pointer, ATLData *p) { - std::lock_guard l(mutex_); - - DEBUG_PRINT("insert: %p + %zu\n", pointer, p->size()); - tracker_.insert(std::make_pair(ATLMemoryRange(pointer, p->size()), p)); -} - -void ATLPointerTracker::remove(void *pointer) { - std::lock_guard l(mutex_); - DEBUG_PRINT("remove: %p\n", pointer); - tracker_.erase(ATLMemoryRange(pointer, 1)); -} - -ATLData *ATLPointerTracker::find(const void *pointer) { - std::lock_guard l(mutex_); - ATLData *ret = NULL; - auto iter = tracker_.find(ATLMemoryRange(pointer, 1)); - DEBUG_PRINT("find: %p\n", pointer); - if (iter != tracker_.end()) // found - ret = iter->second; - return ret; -} - ATLProcessor &get_processor_by_mem_place(atmi_mem_place_t place) { int dev_id = place.dev_id; switch (place.dev_type) { @@ -76,18 +43,12 @@ ATLProcessor &get_processor_by_mem_place(atmi_mem_place_t place) { } } -static hsa_agent_t get_mem_agent(atmi_mem_place_t place) { - return get_processor_by_mem_place(place).agent(); -} - hsa_amd_memory_pool_t get_memory_pool_by_mem_place(atmi_mem_place_t place) { ATLProcessor &proc = get_processor_by_mem_place(place); return get_memory_pool(proc, place.mem_id); } void register_allocation(void *ptr, size_t size, atmi_mem_place_t place) { - ATLData *data = new ATLData(ptr, size, place); - g_data_map.insert(ptr, data); if (place.dev_type == ATMI_DEVTYPE_CPU) allow_access_to_all_gpu_agents(ptr); // TODO(ashwinma): what if one GPU wants to access another GPU? @@ -112,103 +73,13 @@ atmi_status_t Runtime::Malloc(void **ptr, size_t size, atmi_mem_place_t place) { atmi_status_t Runtime::Memfree(void *ptr) { atmi_status_t ret = ATMI_STATUS_SUCCESS; hsa_status_t err; - ATLData *data = g_data_map.find(ptr); - if (!data) - ErrorCheck(Checking pointer info userData, - HSA_STATUS_ERROR_INVALID_ALLOCATION); - - g_data_map.remove(ptr); - delete data; - err = hsa_amd_memory_pool_free(ptr); ErrorCheck(atmi_free, err); DEBUG_PRINT("Freed %p\n", ptr); - if (err != HSA_STATUS_SUCCESS || !data) + if (err != HSA_STATUS_SUCCESS) ret = ATMI_STATUS_ERROR; return ret; } -static hsa_status_t invoke_hsa_copy(hsa_signal_t sig, void *dest, - const void *src, size_t size, - hsa_agent_t agent) { - const hsa_signal_value_t init = 1; - const hsa_signal_value_t success = 0; - hsa_signal_store_screlease(sig, init); - - hsa_status_t err = - hsa_amd_memory_async_copy(dest, agent, src, agent, size, 0, NULL, sig); - if (err != HSA_STATUS_SUCCESS) { - return err; - } - - // async_copy reports success by decrementing and failure by setting to < 0 - hsa_signal_value_t got = init; - while (got == init) { - got = hsa_signal_wait_scacquire(sig, HSA_SIGNAL_CONDITION_NE, init, - UINT64_MAX, ATMI_WAIT_STATE); - } - - if (got != success) { - return HSA_STATUS_ERROR; - } - - return err; -} - -struct atmiFreePtrDeletor { - void operator()(void *p) { - atmi_free(p); // ignore failure to free - } -}; - -atmi_status_t Runtime::Memcpy(hsa_signal_t sig, void *dest, const void *src, - size_t size) { - ATLData *src_data = g_data_map.find(src); - ATLData *dest_data = g_data_map.find(dest); - atmi_mem_place_t cpu = ATMI_MEM_PLACE_CPU_MEM(0, 0, 0); - - void *temp_host_ptr; - atmi_status_t ret = atmi_malloc(&temp_host_ptr, size, cpu); - if (ret != ATMI_STATUS_SUCCESS) { - return ret; - } - std::unique_ptr del(temp_host_ptr); - - if (src_data && !dest_data) { - // Copy from device to scratch to host - hsa_agent_t agent = get_mem_agent(src_data->place()); - DEBUG_PRINT("Memcpy D2H device agent: %lu\n", agent.handle); - - if (invoke_hsa_copy(sig, temp_host_ptr, src, size, agent) != - HSA_STATUS_SUCCESS) { - return ATMI_STATUS_ERROR; - } - - memcpy(dest, temp_host_ptr, size); - - } else if (!src_data && dest_data) { - // Copy from host to scratch to device - hsa_agent_t agent = get_mem_agent(dest_data->place()); - DEBUG_PRINT("Memcpy H2D device agent: %lu\n", agent.handle); - - memcpy(temp_host_ptr, src, size); - - if (invoke_hsa_copy(sig, dest, temp_host_ptr, size, agent) != - HSA_STATUS_SUCCESS) { - return ATMI_STATUS_ERROR; - } - - } else if (!src_data && !dest_data) { - // would be host to host, just call memcpy, or missing metadata - DEBUG_PRINT("atmi_memcpy invoked without metadata\n"); - return ATMI_STATUS_ERROR; - } else { - DEBUG_PRINT("atmi_memcpy unimplemented device to device copy\n"); - return ATMI_STATUS_ERROR; - } - - return ATMI_STATUS_SUCCESS; -} - } // namespace core diff --git a/openmp/libomptarget/plugins/amdgpu/impl/data.h b/openmp/libomptarget/plugins/amdgpu/impl/data.h deleted file mode 100644 index fa9e7380bf673..0000000000000 --- a/openmp/libomptarget/plugins/amdgpu/impl/data.h +++ /dev/null @@ -1,83 +0,0 @@ -/*===-------------------------------------------------------------------------- - * ATMI (Asynchronous Task and Memory Interface) - * - * This file is distributed under the MIT License. See LICENSE.txt for details. - *===------------------------------------------------------------------------*/ -#ifndef SRC_RUNTIME_INCLUDE_DATA_H_ -#define SRC_RUNTIME_INCLUDE_DATA_H_ -#include "atmi.h" -#include -#include -#include -#include -#include -// we maintain our own mapping of device addr to a user specified data object -// in order to work around a (possibly historic) bug in ROCr's -// hsa_amd_pointer_info_set_userdata for variable symbols -// this is expected to be temporary - -namespace core { -// Internal representation of any data that is created and managed by ATMI. -// Data can be located on any device memory or host memory. -class ATLData { -public: - ATLData(void *ptr, size_t size, atmi_mem_place_t place) - : ptr_(ptr), size_(size), place_(place) {} - - void *ptr() const { return ptr_; } - size_t size() const { return size_; } - atmi_mem_place_t place() const { return place_; } - -private: - void *ptr_; - size_t size_; - atmi_mem_place_t place_; -}; - -//--- -struct ATLMemoryRange { - const void *base_pointer; - const void *end_pointer; - ATLMemoryRange(const void *bp, size_t size_bytes) - : base_pointer(bp), - end_pointer(reinterpret_cast(bp) + size_bytes - - 1) {} -}; - -// Functor to compare ranges: -struct ATLMemoryRangeCompare { - // Return true is LHS range is less than RHS - used to order the ranges - bool operator()(const ATLMemoryRange &lhs, const ATLMemoryRange &rhs) const { - return lhs.end_pointer < rhs.base_pointer; - } -}; - -//------------------------------------------------------------------------------------------------- -// This structure tracks information for each pointer. -// Uses memory-range-based lookups - so pointers that exist anywhere in the -// range of hostPtr + size -// will find the associated ATLPointerInfo. -// The insertions and lookups use a self-balancing binary tree and should -// support O(logN) lookup speed. -// The structure is thread-safe - writers obtain a mutex before modifying the -// tree. Multiple simulatenous readers are supported. -class ATLPointerTracker { - typedef std::map - MapTrackerType; - -public: - void insert(void *pointer, ATLData *data); - void remove(void *pointer); - ATLData *find(const void *pointer); - -private: - MapTrackerType tracker_; - std::mutex mutex_; -}; - -extern ATLPointerTracker g_data_map; // Track all am pointer allocations. - -enum class Direction { ATMI_H2D, ATMI_D2H, ATMI_D2D, ATMI_H2H }; - -} // namespace core -#endif // SRC_RUNTIME_INCLUDE_DATA_H_ diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp index 8fd7b7e34155f..dc3a288903f03 100644 --- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -335,27 +335,28 @@ class RTLDeviceInfoTy { static const int Default_WG_Size = llvm::omp::AMDGPUGpuGridValues[llvm::omp::GVIDX::GV_Default_WG_Size]; - using MemcpyFunc = atmi_status_t(hsa_signal_t, void *, const void *, - size_t size); + using MemcpyFunc = atmi_status_t (*)(hsa_signal_t, void *, const void *, + size_t size, hsa_agent_t); atmi_status_t freesignalpool_memcpy(void *dest, const void *src, size_t size, - MemcpyFunc Func) { + MemcpyFunc Func, int32_t deviceId) { + hsa_agent_t agent = HSAAgents[deviceId]; hsa_signal_t s = FreeSignalPool.pop(); if (s.handle == 0) { return ATMI_STATUS_ERROR; } - atmi_status_t r = Func(s, dest, src, size); + atmi_status_t r = Func(s, dest, src, size, agent); FreeSignalPool.push(s); return r; } atmi_status_t freesignalpool_memcpy_d2h(void *dest, const void *src, - size_t size) { - return freesignalpool_memcpy(dest, src, size, atmi_memcpy_d2h); + size_t size, int32_t deviceId) { + return freesignalpool_memcpy(dest, src, size, atmi_memcpy_d2h, deviceId); } atmi_status_t freesignalpool_memcpy_h2d(void *dest, const void *src, - size_t size) { - return freesignalpool_memcpy(dest, src, size, atmi_memcpy_h2d); + size_t size, int32_t deviceId) { + return freesignalpool_memcpy(dest, src, size, atmi_memcpy_h2d, deviceId); } // Record entry point associated with device @@ -550,7 +551,8 @@ int32_t dataRetrieve(int32_t DeviceId, void *HstPtr, void *TgtPtr, int64_t Size, (long long unsigned)(Elf64_Addr)TgtPtr, (long long unsigned)(Elf64_Addr)HstPtr); - err = DeviceInfo.freesignalpool_memcpy_d2h(HstPtr, TgtPtr, (size_t)Size); + err = DeviceInfo.freesignalpool_memcpy_d2h(HstPtr, TgtPtr, (size_t)Size, + DeviceId); if (err != ATMI_STATUS_SUCCESS) { DP("Error when copying data from device to host. Pointers: " @@ -576,7 +578,8 @@ int32_t dataSubmit(int32_t DeviceId, void *TgtPtr, void *HstPtr, int64_t Size, DP("Submit data %ld bytes, (hst:%016llx) -> (tgt:%016llx).\n", Size, (long long unsigned)(Elf64_Addr)HstPtr, (long long unsigned)(Elf64_Addr)TgtPtr); - err = DeviceInfo.freesignalpool_memcpy_h2d(TgtPtr, HstPtr, (size_t)Size); + err = DeviceInfo.freesignalpool_memcpy_h2d(TgtPtr, HstPtr, (size_t)Size, + DeviceId); if (err != ATMI_STATUS_SUCCESS) { DP("Error when copying data from host to device. Pointers: " "host = 0x%016lx, device = 0x%016lx, size = %lld\n", @@ -1033,7 +1036,8 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id, } // write ptr to device memory so it can be used by later kernels - err = DeviceInfo.freesignalpool_memcpy_h2d(state_ptr, &ptr, sizeof(void *)); + err = DeviceInfo.freesignalpool_memcpy_h2d(state_ptr, &ptr, sizeof(void *), + device_id); if (err != ATMI_STATUS_SUCCESS) { fprintf(stderr, "memcpy install of state_ptr failed\n"); return NULL; @@ -1103,7 +1107,7 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id, // can access host addresses directly. There is no longer a // need for device copies. err = DeviceInfo.freesignalpool_memcpy_h2d(varptr, e->addr, - sizeof(void *)); + sizeof(void *), device_id); if (err != ATMI_STATUS_SUCCESS) DP("Error when copying USM\n"); DP("Copy linked variable host address (" DPxMOD ")" @@ -1532,7 +1536,7 @@ static void *AllocateNestedParallelCallMemory(int MaxParLevel, int NumGroups, atmi_status_t err = atmi_malloc(&TgtPtr, NestedMemSize, get_gpu_mem_place(device_id)); err = DeviceInfo.freesignalpool_memcpy_h2d(CallStackAddr, &TgtPtr, - sizeof(void *)); + sizeof(void *), device_id); if (print_kernel_trace > 2) fprintf(stderr, "CallSck %lx TgtPtr %lx *TgtPtr %lx \n", (long)CallStackAddr, (long)&TgtPtr, (long)TgtPtr); From 53c43431bc6a01ad1e29b9351450ac18d5270ab3 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 19 Oct 2020 16:53:00 -0400 Subject: [PATCH 055/179] AMDGPU: Propagate amdgpu-flat-work-group-size attributes Fixes being overly conservative with the register counts in called functions. This should try to do a conservative range merge, but for now just clone. Also fix not being able to functionally run the pass standalone. --- .../AMDGPU/AMDGPUPropagateAttributes.cpp | 24 ++++++++-- ...opagate-attributes-flat-work-group-size.ll | 48 +++++++++++++++++++ 2 files changed, 68 insertions(+), 4 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/propagate-attributes-flat-work-group-size.ll diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp index 982aae3748849..dcbe4270e8a92 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp @@ -32,6 +32,7 @@ #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" #include "llvm/Target/TargetMachine.h" @@ -56,8 +57,10 @@ static constexpr const FeatureBitset TargetFeatures = { }; // Attributes to propagate. +// TODO: Support conservative min/max merging instead of cloning. static constexpr const char* AttributeNames[] = { - "amdgpu-waves-per-eu" + "amdgpu-waves-per-eu", + "amdgpu-flat-work-group-size" }; static constexpr unsigned NumAttr = @@ -371,15 +374,28 @@ AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const } bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) { - if (!TM || !AMDGPU::isEntryFunctionCC(F.getCallingConv())) + if (!TM) { + auto *TPC = getAnalysisIfAvailable(); + if (!TPC) + return false; + + TM = &TPC->getTM(); + } + + if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) return false; return AMDGPUPropagateAttributes(TM, false).process(F); } bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) { - if (!TM) - return false; + if (!TM) { + auto *TPC = getAnalysisIfAvailable(); + if (!TPC) + return false; + + TM = &TPC->getTM(); + } return AMDGPUPropagateAttributes(TM, true).process(M); } diff --git a/llvm/test/CodeGen/AMDGPU/propagate-attributes-flat-work-group-size.ll b/llvm/test/CodeGen/AMDGPU/propagate-attributes-flat-work-group-size.ll new file mode 100644 index 0000000000000..30c6eded2397a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/propagate-attributes-flat-work-group-size.ll @@ -0,0 +1,48 @@ +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-propagate-attributes-late %s | FileCheck %s + +; CHECK: define internal void @max_flat_1_1024() #0 { +define internal void @max_flat_1_1024() #0 { + ret void +} + +; CHECK: define internal void @max_flat_1_256() #1 { +define internal void @max_flat_1_256() #1 { + ret void +} + +; CHECK: define amdgpu_kernel void @kernel_1_256_call_default() #1 { +define amdgpu_kernel void @kernel_1_256_call_default() #1 { + call void @default() + ret void +} + +; CHECK: define amdgpu_kernel void @kernel_1_256_call_1_256() #1 { +define amdgpu_kernel void @kernel_1_256_call_1_256() #1 { + call void @max_flat_1_256() + ret void +} + +; CHECK: define amdgpu_kernel void @kernel_1_256_call_64_64() #1 { +define amdgpu_kernel void @kernel_1_256_call_64_64() #1 { + call void @max_flat_64_64() + ret void +} + +; CHECK: define internal void @max_flat_64_64() #2 { +define internal void @max_flat_64_64() #2 { + ret void +} + +; CHECK: define internal void @default() #2 { +define internal void @default() #3 { + ret void +} + +attributes #0 = { noinline "amdgpu-flat-work-group-size"="1,1024" } +attributes #1 = { noinline "amdgpu-flat-work-group-size"="1,256" } +attributes #2 = { noinline "amdgpu-flat-work-group-size"="64,64" } +attributes #3 = { noinline } + +; CHECK: attributes #0 = { noinline "amdgpu-flat-work-group-size"="1,1024" +; CHECK-NEXT: attributes #1 = { noinline "amdgpu-flat-work-group-size"="1,256" +; CHECK-NEXT: attributes #2 = { noinline "amdgpu-flat-work-group-size"="1,256" From 1ed4caff1d5cd49233c1ae7b9f6483a946ed5eea Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 16 Oct 2020 14:15:03 -0400 Subject: [PATCH 056/179] AMDGPU: Lower the threshold reported for maximum stack size exceeded Check the actual maximum supported stack size for a kernel. --- llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 4 +- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 2 +- .../CodeGen/AMDGPU/stack-size-overflow.ll | 45 ++++++++++++++++--- 3 files changed, 43 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 6238fb9664c65..c76d292acd6a8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -993,7 +993,9 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, ProgInfo.FlatUsed = Info.UsesFlatScratch; ProgInfo.DynamicCallStack = Info.HasDynamicallySizedStack || Info.HasRecursion; - if (!isUInt<32>(ProgInfo.ScratchSize)) { + const uint64_t MaxScratchPerWorkitem = + GCNSubtarget::MaxWaveScratchSize / STM.getWavefrontSize(); + if (ProgInfo.ScratchSize > MaxScratchPerWorkitem) { DiagnosticInfoStackSize DiagStackSize(MF.getFunction(), ProgInfo.ScratchSize, DS_Error); MF.getFunction().getContext().diagnose(DiagStackSize); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index 568180e20ebdb..17021fc06ca51 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -422,10 +422,10 @@ class GCNSubtarget : public AMDGPUGenSubtargetInfo, SITargetLowering TLInfo; SIFrameLowering FrameLowering; +public: // See COMPUTE_TMPRING_SIZE.WAVESIZE, 13-bit field in units of 256-dword. static const unsigned MaxWaveScratchSize = (256 * 4) * ((1 << 13) - 1); -public: GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, const GCNTargetMachine &TM); ~GCNSubtarget() override; diff --git a/llvm/test/CodeGen/AMDGPU/stack-size-overflow.ll b/llvm/test/CodeGen/AMDGPU/stack-size-overflow.ll index ed7e2092e8867..8550b7b1ed484 100644 --- a/llvm/test/CodeGen/AMDGPU/stack-size-overflow.ll +++ b/llvm/test/CodeGen/AMDGPU/stack-size-overflow.ll @@ -3,12 +3,45 @@ declare void @llvm.memset.p5i8.i32(i8 addrspace(5)* nocapture, i8, i32, i32, i1) #1 -; ERROR: error: stack size limit exceeded (4294967296) in stack_size_limit -; GCN: ; ScratchSize: 4294967296 -define amdgpu_kernel void @stack_size_limit() #0 { +; ERROR: error: stack size limit exceeded (131061) in stack_size_limit_wave64 +; GCN: ; ScratchSize: 131061 +define amdgpu_kernel void @stack_size_limit_wave64() #0 { entry: - %alloca = alloca [1073741823 x i32], align 4, addrspace(5) - %bc = bitcast [1073741823 x i32] addrspace(5)* %alloca to i8 addrspace(5)* - call void @llvm.memset.p5i8.i32(i8 addrspace(5)* %bc, i8 9, i32 1073741823, i32 1, i1 true) + %alloca = alloca [131057 x i8], align 1, addrspace(5) + %alloca.bc = bitcast [131057 x i8] addrspace(5)* %alloca to i8 addrspace(5)* + call void @llvm.memset.p5i8.i32(i8 addrspace(5)* %alloca.bc, i8 9, i32 131057, i32 1, i1 true) ret void } + +; ERROR: error: stack size limit exceeded (262117) in stack_size_limit_wave32 +; GCN: ; ScratchSize: 262117 +define amdgpu_kernel void @stack_size_limit_wave32() #1 { +entry: + %alloca = alloca [262113 x i8], align 1, addrspace(5) + %alloca.bc = bitcast [262113 x i8] addrspace(5)* %alloca to i8 addrspace(5)* + call void @llvm.memset.p5i8.i32(i8 addrspace(5)* %alloca.bc, i8 9, i32 262113, i32 1, i1 true) + ret void +} + +; ERROR-NOT: error: +; GCN: ; ScratchSize: 131056 +define amdgpu_kernel void @max_stack_size_wave64() #0 { +entry: + %alloca = alloca [131052 x i8], align 1, addrspace(5) + %alloca.bc = bitcast [131052 x i8] addrspace(5)* %alloca to i8 addrspace(5)* + call void @llvm.memset.p5i8.i32(i8 addrspace(5)* %alloca.bc, i8 9, i32 131052, i32 1, i1 true) + ret void +} + +; ERROR-NOT: error: +; GCN: ; ScratchSize: 262112 +define amdgpu_kernel void @max_stack_size_wave32() #1 { +entry: + %alloca = alloca [262108 x i8], align 1, addrspace(5) + %alloca.bc = bitcast [262108 x i8] addrspace(5)* %alloca to i8 addrspace(5)* + call void @llvm.memset.p5i8.i32(i8 addrspace(5)* %alloca.bc, i8 9, i32 262108, i32 1, i1 true) + ret void +} + +attributes #0 = { "target-cpu" = "gfx900" } +attributes #1 = { "target-cpu" = "gfx1010" } From 4b7dafd9046f0ceaadacaafe0ea4a1fb00cf70a5 Mon Sep 17 00:00:00 2001 From: Frej Drejhammar Date: Wed, 21 Oct 2020 18:07:30 +0200 Subject: [PATCH 057/179] [mlir]: Clarify docs for external OpTrait::FunctionLike ops The documentation claims that an op with the trait FunctionLike has a single region containing the blocks that corresponding to the body of the function. It then goes on to say that the absence of a region corresponds to an external function when, in fact, this is represented by a single empty region. This patch changes the wording in the documentation to match the implementation. Signed-off-by: Frej Drejhammar Co-authored-by: Frej Drejhammar Co-authored-by: Klas Segeljakt Reviewed By: ftynse Differential Revision: https://reviews.llvm.org/D89868 --- mlir/docs/Traits.md | 2 +- mlir/include/mlir/IR/FunctionSupport.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mlir/docs/Traits.md b/mlir/docs/Traits.md index 8b1cf0a03b99c..b8debb7d275fd 100644 --- a/mlir/docs/Traits.md +++ b/mlir/docs/Traits.md @@ -249,7 +249,7 @@ particular: - Ops must be symbols, i.e. also have the `Symbol` trait; - Ops have a single region with multiple blocks that corresponds to the body of the function; -- the absence of a region corresponds to an external function; +- An op with a single empty region corresponds to an external function; - arguments of the first block of the region are treated as function arguments; - they can have argument and result attributes that are stored in dictionary diff --git a/mlir/include/mlir/IR/FunctionSupport.h b/mlir/include/mlir/IR/FunctionSupport.h index 3d467cd4f3642..7756761c2a523 100644 --- a/mlir/include/mlir/IR/FunctionSupport.h +++ b/mlir/include/mlir/IR/FunctionSupport.h @@ -79,7 +79,7 @@ namespace OpTrait { /// - Ops must be symbols, i.e. also have the `Symbol` trait; /// - Ops have a single region with multiple blocks that corresponds to the body /// of the function; -/// - the absence of a region corresponds to an external function; +/// - An op with a single empty region corresponds to an external function; /// - leading arguments of the first block of the region are treated as function /// arguments; /// - they can have argument attributes that are stored in a dictionary From 1c1803dbb0f6a4a9bef557d98b4f3a498eb1bf75 Mon Sep 17 00:00:00 2001 From: Christian Sigg Date: Wed, 21 Oct 2020 11:46:32 +0200 Subject: [PATCH 058/179] [mlir][gpu] Add customer printer/parser for gpu.launch_func. Reviewed By: herhut Differential Revision: https://reviews.llvm.org/D89262 --- mlir/include/mlir/Dialect/GPU/GPUOps.td | 42 +++++++-------- mlir/include/mlir/IR/FunctionImplementation.h | 10 ++++ mlir/lib/Dialect/GPU/IR/GPUDialect.cpp | 47 +++++++++++------ mlir/lib/IR/FunctionImplementation.cpp | 17 ++++--- mlir/test/Dialect/GPU/invalid.mlir | 51 ++++++++----------- mlir/test/Dialect/GPU/ops.mlir | 14 ++--- mlir/test/Dialect/GPU/outlining.mlir | 16 +++--- 7 files changed, 103 insertions(+), 94 deletions(-) diff --git a/mlir/include/mlir/Dialect/GPU/GPUOps.td b/mlir/include/mlir/Dialect/GPU/GPUOps.td index 1542241b5b021..180e9cf67d22b 100644 --- a/mlir/include/mlir/Dialect/GPU/GPUOps.td +++ b/mlir/include/mlir/Dialect/GPU/GPUOps.td @@ -18,10 +18,6 @@ include "mlir/Dialect/LLVMIR/LLVMOpBase.td" include "mlir/IR/SymbolInterfaces.td" include "mlir/Interfaces/SideEffectInterfaces.td" -// Type constraint accepting standard integers, indices. -def IntOrIndex : TypeConstraint< - Or<[AnySignlessInteger.predicate, Index.predicate]>, "integer or index">; - //===----------------------------------------------------------------------===// // GPU Dialect operations. //===----------------------------------------------------------------------===// @@ -296,9 +292,9 @@ def GPU_GPUFuncOp : GPU_Op<"func", [HasParent<"GPUModuleOp">, } def GPU_LaunchFuncOp : GPU_Op<"launch_func">, - Arguments<(ins IntOrIndex:$gridSizeX, IntOrIndex:$gridSizeY, - IntOrIndex:$gridSizeZ, IntOrIndex:$blockSizeX, - IntOrIndex:$blockSizeY, IntOrIndex:$blockSizeZ, + Arguments<(ins SymbolRefAttr:$kernel, + Index:$gridSizeX, Index:$gridSizeY, Index:$gridSizeZ, + Index:$blockSizeX, Index:$blockSizeY, Index:$blockSizeZ, Variadic:$operands)>, Results<(outs)> { let summary = "Launches a function as a GPU kernel"; @@ -312,8 +308,8 @@ def GPU_LaunchFuncOp : GPU_Op<"launch_func">, function is required to be a gpu.module. And finally, the module containing the kernel module (which thus cannot be the top-level module) is required to have the `gpu.container_module` attribute. The `gpu.launch_func` - operation has a symbol attribute named `kernel` to identify the fully specified - kernel function to launch (both the gpu.module and func). + operation has a symbol attribute named `kernel` to identify the fully + specified kernel function to launch (both the gpu.module and func). The operation takes at least six operands, with the first three operands being grid sizes along x,y,z dimensions and the following three being block @@ -321,8 +317,6 @@ def GPU_LaunchFuncOp : GPU_Op<"launch_func">, unused sizes must be explicitly set to `1`. The remaining operands are passed as arguments to the kernel function. - A custom syntax for this operation is currently not available. - Example: ```mlir @@ -357,13 +351,11 @@ def GPU_LaunchFuncOp : GPU_Op<"launch_func">, } } - "gpu.launch_func"(%cst, %cst, %cst, // Grid sizes. - %cst, %cst, %cst, // Block sizes. - %arg0, %arg1) // Arguments passed to the kernel. - { kernel_module = @kernels, // Module containing the kernel. - kernel = "kernel_1" } // Kernel function. - : (index, index, index, index, index, index, f32, memref) - -> () + gpu.launch_func + @kernels::@kernel_1 // Kernel function. + blocks in (%cst, %cst, %cst) // Grid size. + threads in (%cst, %cst, %cst) // Block size. + args(%arg0 : f32, %arg1 : memref) // Kernel arguments. } ``` }]; @@ -371,19 +363,12 @@ def GPU_LaunchFuncOp : GPU_Op<"launch_func">, let skipDefaultBuilders = 1; let builders = [ - OpBuilder<"GPUFuncOp kernelFunc, " - "Value gridSizeX, Value gridSizeY, Value gridSizeZ, " - "Value blockSizeX, Value blockSizeY, Value blockSizeZ, " - "ValueRange kernelOperands">, OpBuilder<"GPUFuncOp kernelFunc, " "KernelDim3 gridSize, KernelDim3 blockSize, " "ValueRange kernelOperands"> ]; let extraClassDeclaration = [{ - /// The kernel function specified by the operation's `kernel` attribute. - SymbolRefAttr kernel(); - /// The number of operands passed to the kernel function. unsigned getNumKernelOperands(); @@ -416,6 +401,13 @@ def GPU_LaunchFuncOp : GPU_Op<"launch_func">, }]; let verifier = [{ return ::verify(*this); }]; + let assemblyFormat = [{ + $kernel + `blocks` `in` ` ` `(`$gridSizeX`,` $gridSizeY`,` $gridSizeZ`)` + `threads` `in` ` ` `(`$blockSizeX`,` $blockSizeY`,` $blockSizeZ`)` + custom($operands, type($operands)) + attr-dict + }]; } def GPU_LaunchOp : GPU_Op<"launch">, diff --git a/mlir/include/mlir/IR/FunctionImplementation.h b/mlir/include/mlir/IR/FunctionImplementation.h index 958cba51f6dcf..c19100c55219c 100644 --- a/mlir/include/mlir/IR/FunctionImplementation.h +++ b/mlir/include/mlir/IR/FunctionImplementation.h @@ -49,6 +49,16 @@ void addArgAndResultAttrs(Builder &builder, OperationState &result, using FuncTypeBuilder = function_ref, ArrayRef, VariadicFlag, std::string &)>; +/// Parses function arguments using `parser`. The `allowVariadic` argument +/// indicates whether functions with variadic arguments are supported. The +/// trailing arguments are populated by this function with names, types and +/// attributes of the arguments. +ParseResult parseFunctionArgumentList( + OpAsmParser &parser, bool allowAttributes, bool allowVariadic, + SmallVectorImpl &argNames, + SmallVectorImpl &argTypes, SmallVectorImpl &argAttrs, + bool &isVariadic); + /// Parses a function signature using `parser`. The `allowVariadic` argument /// indicates whether functions with variadic arguments are supported. The /// trailing arguments are populated by this function with names, types and diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp index 79fe969dbe175..7abefd7a54996 100644 --- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp +++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp @@ -428,12 +428,11 @@ static ParseResult parseLaunchOp(OpAsmParser &parser, OperationState &result) { //===----------------------------------------------------------------------===// void LaunchFuncOp::build(OpBuilder &builder, OperationState &result, - GPUFuncOp kernelFunc, Value gridSizeX, Value gridSizeY, - Value gridSizeZ, Value blockSizeX, Value blockSizeY, - Value blockSizeZ, ValueRange kernelOperands) { + GPUFuncOp kernelFunc, KernelDim3 gridSize, + KernelDim3 blockSize, ValueRange kernelOperands) { // Add grid and block sizes as op operands, followed by the data operands. - result.addOperands( - {gridSizeX, gridSizeY, gridSizeZ, blockSizeX, blockSizeY, blockSizeZ}); + result.addOperands({gridSize.x, gridSize.y, gridSize.z, blockSize.x, + blockSize.y, blockSize.z}); result.addOperands(kernelOperands); auto kernelModule = kernelFunc.getParentOfType(); auto kernelSymbol = builder.getSymbolRefAttr( @@ -441,17 +440,6 @@ void LaunchFuncOp::build(OpBuilder &builder, OperationState &result, result.addAttribute(getKernelAttrName(), kernelSymbol); } -void LaunchFuncOp::build(OpBuilder &builder, OperationState &result, - GPUFuncOp kernelFunc, KernelDim3 gridSize, - KernelDim3 blockSize, ValueRange kernelOperands) { - build(builder, result, kernelFunc, gridSize.x, gridSize.y, gridSize.z, - blockSize.x, blockSize.y, blockSize.z, kernelOperands); -} - -SymbolRefAttr LaunchFuncOp::kernel() { - return getAttrOfType(getKernelAttrName()); -} - unsigned LaunchFuncOp::getNumKernelOperands() { return getNumOperands() - kNumConfigOperands; } @@ -492,6 +480,33 @@ static LogicalResult verify(LaunchFuncOp op) { return success(); } +static ParseResult +parseLaunchFuncOperands(OpAsmParser &parser, + SmallVectorImpl &argNames, + SmallVectorImpl &argTypes) { + if (parser.parseOptionalKeyword("args")) + return success(); + SmallVector argAttrs; + bool isVariadic = false; + return impl::parseFunctionArgumentList(parser, /*allowAttributes=*/false, + /*allowVariadic=*/false, argNames, + argTypes, argAttrs, isVariadic); +} + +static void printLaunchFuncOperands(OpAsmPrinter &printer, + OperandRange operands, TypeRange types) { + if (operands.empty()) + return; + printer << "args("; + llvm::interleaveComma(llvm::zip(operands, types), printer, + [&](const auto &pair) { + printer.printOperand(std::get<0>(pair)); + printer << " : "; + printer.printType(std::get<1>(pair)); + }); + printer << ")"; +} + //===----------------------------------------------------------------------===// // GPUFuncOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/IR/FunctionImplementation.cpp b/mlir/lib/IR/FunctionImplementation.cpp index 13aee344bbdcd..56b2221fd44f5 100644 --- a/mlir/lib/IR/FunctionImplementation.cpp +++ b/mlir/lib/IR/FunctionImplementation.cpp @@ -13,11 +13,11 @@ using namespace mlir; -static ParseResult -parseArgumentList(OpAsmParser &parser, bool allowVariadic, - SmallVectorImpl &argTypes, - SmallVectorImpl &argNames, - SmallVectorImpl &argAttrs, bool &isVariadic) { +ParseResult mlir::impl::parseFunctionArgumentList( + OpAsmParser &parser, bool allowAttributes, bool allowVariadic, + SmallVectorImpl &argNames, + SmallVectorImpl &argTypes, SmallVectorImpl &argAttrs, + bool &isVariadic) { if (parser.parseLParen()) return failure(); @@ -56,6 +56,8 @@ parseArgumentList(OpAsmParser &parser, bool allowVariadic, NamedAttrList attrs; if (parser.parseOptionalAttrDict(attrs)) return failure(); + if (!allowAttributes && !attrs.empty()) + return parser.emitError(loc, "expected arguments without attributes"); argAttrs.push_back(attrs); return success(); }; @@ -129,8 +131,9 @@ ParseResult mlir::impl::parseFunctionSignature( SmallVectorImpl &argTypes, SmallVectorImpl &argAttrs, bool &isVariadic, SmallVectorImpl &resultTypes, SmallVectorImpl &resultAttrs) { - if (parseArgumentList(parser, allowVariadic, argTypes, argNames, argAttrs, - isVariadic)) + bool allowArgAttrs = true; + if (parseFunctionArgumentList(parser, allowArgAttrs, allowVariadic, argNames, + argTypes, argAttrs, isVariadic)) return failure(); if (succeeded(parser.parseOptionalArrow())) return parseFunctionResultList(parser, resultTypes, resultAttrs); diff --git a/mlir/test/Dialect/GPU/invalid.mlir b/mlir/test/Dialect/GPU/invalid.mlir index cfdb06ac57023..3612b8e0dcc16 100644 --- a/mlir/test/Dialect/GPU/invalid.mlir +++ b/mlir/test/Dialect/GPU/invalid.mlir @@ -45,8 +45,7 @@ func @launch_func_too_few_operands(%sz : index) { func @launch_func_missing_parent_module_attribute(%sz : index) { // expected-error@+1 {{expected the closest surrounding module to have the 'gpu.container_module' attribute}} - "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz) {foo = "bar"} - : (index, index, index, index, index, index) -> () + gpu.launch_func @foo::@bar blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) return } @@ -54,8 +53,8 @@ func @launch_func_missing_parent_module_attribute(%sz : index) { module attributes {gpu.container_module} { func @launch_func_missing_callee_attribute(%sz : index) { - // expected-error@+1 {{symbol reference attribute 'kernel' must be specified}} - "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz) {foo = "bar"} + // expected-error@+1 {{'gpu.launch_func' op requires attribute 'kernel'}} + "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz) : (index, index, index, index, index, index) -> () return } @@ -65,9 +64,8 @@ module attributes {gpu.container_module} { module attributes {gpu.container_module} { func @launch_func_no_function_attribute(%sz : index) { - // expected-error@+1 {{symbol reference attribute 'kernel' must be specified}} - "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz) {kernel = 10} - : (index, index, index, index, index, index) -> () + // expected-error@+1 {{custom op 'gpu.launch_func' invalid kind of attribute specified}} + gpu.launch_func "foo" blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) return } } @@ -77,9 +75,7 @@ module attributes {gpu.container_module} { module attributes {gpu.container_module} { func @launch_func_undefined_module(%sz : index) { // expected-error@+1 {{kernel module 'kernels' is undefined}} - "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz) - { kernel = @kernels::@kernel_1 } - : (index, index, index, index, index, index) -> () + gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) return } } @@ -103,9 +99,7 @@ module attributes {gpu.container_module} { func @launch_func_missing_module_attribute(%sz : index) { // expected-error@+1 {{kernel module 'kernels' is undefined}} - "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz) - { kernel = @kernels::@kernel_1 } - : (index, index, index, index, index, index) -> () + gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) return } } @@ -117,9 +111,7 @@ module attributes {gpu.container_module} { func @launch_func_undefined_function(%sz : index) { // expected-error@+1 {{kernel function '@kernels::@kernel_1' is undefined}} - "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz) - { kernel = @kernels::@kernel_1 } - : (index, index, index, index, index, index) -> () + gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) return } } @@ -135,9 +127,7 @@ module attributes {gpu.container_module} { func @launch_func_missing_kernel_attr(%sz : index, %arg : !llvm.ptr) { // expected-error@+1 {{kernel module 'kernels' is undefined}} - "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz, %arg) - {kernel = @kernels::@kernel_1} - : (index, index, index, index, index, index, !llvm.ptr) -> () + gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) args(%arg : !llvm.ptr) return } } @@ -153,9 +143,7 @@ module attributes {gpu.container_module} { func @launch_func_missing_kernel_attr(%sz : index, %arg : !llvm.ptr) { // expected-error@+1 {{kernel function is missing the 'gpu.kernel' attribute}} - "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz, %arg) - {kernel = @kernels::@kernel_1} - : (index, index, index, index, index, index, !llvm.ptr) -> () + gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) args(%arg : !llvm.ptr) return } } @@ -171,10 +159,7 @@ module attributes {gpu.container_module} { func @launch_func_kernel_operand_size(%sz : index, %arg : !llvm.ptr) { // expected-error@+1 {{got 2 kernel operands but expected 1}} - "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz, %arg, %arg) - {kernel = @kernels::@kernel_1} - : (index, index, index, index, index, index, !llvm.ptr, - !llvm.ptr) -> () + gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) args(%arg : !llvm.ptr, %arg : !llvm.ptr) return } } @@ -190,9 +175,17 @@ module attributes {gpu.container_module} { func @launch_func_kernel_operand_types(%sz : index, %arg : f32) { // expected-err@+1 {{type of function argument 0 does not match}} - "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz, %arg) - {kernel = @kernels::@kernel_1} - : (index, index, index, index, index, index, f32) -> () + gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) args(%arg : f32) + return + } +} + +// ----- + +module attributes {gpu.container_module} { + func @launch_func_kernel_operand_attr(%sz : index) { + // expected-error@+1 {{expected arguments without attributes}} + gpu.launch_func @foo::@bar blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) args(%sz : index {foo}) return } } diff --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir index 23cd6d5c7d0a9..e81b233abfbce 100644 --- a/mlir/test/Dialect/GPU/ops.mlir +++ b/mlir/test/Dialect/GPU/ops.mlir @@ -63,7 +63,7 @@ module attributes {gpu.container_module} { gpu.return } - gpu.func @kernel_2(%arg0: f32, %arg1: memref) kernel { + gpu.func @kernel_2() kernel { gpu.return } } @@ -74,15 +74,11 @@ module attributes {gpu.container_module} { // CHECK: %{{.*}} = constant 8 %cst = constant 8 : index - // CHECK: "gpu.launch_func"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {kernel = @kernels::@kernel_1} : (index, index, index, index, index, index, f32, memref) -> () - "gpu.launch_func"(%cst, %cst, %cst, %cst, %cst, %cst, %0, %1) - { kernel = @kernels::@kernel_1} - : (index, index, index, index, index, index, f32, memref) -> () + // CHECK: gpu.launch_func @kernels::@kernel_1 blocks in (%{{.*}}, %{{.*}}, %{{.*}}) threads in (%{{.*}}, %{{.*}}, %{{.*}}) args(%{{.*}} : f32, %{{.*}} : memref) + gpu.launch_func @kernels::@kernel_1 blocks in (%cst, %cst, %cst) threads in (%cst, %cst, %cst) args(%0 : f32, %1 : memref) - // CHECK: "gpu.launch_func"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {kernel = @kernels::@kernel_2} : (index, index, index, index, index, index, f32, memref) -> () - "gpu.launch_func"(%cst, %cst, %cst, %cst, %cst, %cst, %0, %1) - { kernel = @kernels::@kernel_2} - : (index, index, index, index, index, index, f32, memref) -> () + // CHECK: gpu.launch_func @kernels::@kernel_2 blocks in (%{{.*}}, %{{.*}}, %{{.*}}) threads in (%{{.*}}, %{{.*}}, %{{.*}}) + gpu.launch_func @kernels::@kernel_2 blocks in (%cst, %cst, %cst) threads in (%cst, %cst, %cst) return } diff --git a/mlir/test/Dialect/GPU/outlining.mlir b/mlir/test/Dialect/GPU/outlining.mlir index 5fd8b6ce79cd5..e2f16fe96a08a 100644 --- a/mlir/test/Dialect/GPU/outlining.mlir +++ b/mlir/test/Dialect/GPU/outlining.mlir @@ -21,7 +21,7 @@ func @launch() { // CHECK: %[[BDIMZ:.*]] = constant 28 %bDimZ = constant 28 : index - // CHECK: "gpu.launch_func"(%[[GDIMX]], %[[GDIMY]], %[[GDIMZ]], %[[BDIMX]], %[[BDIMY]], %[[BDIMZ]], %[[ARG0]], %[[ARG1]]) {kernel = @launch_kernel::@launch_kernel} : (index, index, index, index, index, index, f32, memref) -> () + // CHECK: gpu.launch_func @launch_kernel::@launch_kernel blocks in (%[[GDIMX]], %[[GDIMY]], %[[GDIMZ]]) threads in (%[[BDIMX]], %[[BDIMY]], %[[BDIMZ]]) args(%[[ARG0]] : f32, %[[ARG1]] : memref) // CHECK-NOT: gpu.launch blocks gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %gDimX, %grid_y = %gDimY, %grid_z = %gDimZ) @@ -64,14 +64,14 @@ func @launch() { func @multiple_launches() { // CHECK: %[[CST:.*]] = constant 8 : index %cst = constant 8 : index - // CHECK: "gpu.launch_func"(%[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]]) {kernel = @multiple_launches_kernel::@multiple_launches_kernel} : (index, index, index, index, index, index) -> () + // CHECK: gpu.launch_func @multiple_launches_kernel::@multiple_launches_kernel blocks in (%[[CST]], %[[CST]], %[[CST]]) threads in (%[[CST]], %[[CST]], %[[CST]]) gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %cst, %grid_y = %cst, %grid_z = %cst) threads(%tx, %ty, %tz) in (%block_x = %cst, %block_y = %cst, %block_z = %cst) { gpu.terminator } - // CHECK: "gpu.launch_func"(%[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]]) {kernel = @multiple_launches_kernel_0::@multiple_launches_kernel} : (index, index, index, index, index, index) -> () + // CHECK: gpu.launch_func @multiple_launches_kernel_0::@multiple_launches_kernel blocks in (%[[CST]], %[[CST]], %[[CST]]) threads in (%[[CST]], %[[CST]], %[[CST]]) gpu.launch blocks(%bx2, %by2, %bz2) in (%grid_x2 = %cst, %grid_y2 = %cst, %grid_z2 = %cst) threads(%tx2, %ty2, %tz2) in (%block_x2 = %cst, %block_y2 = %cst, @@ -95,7 +95,7 @@ func @extra_constants_not_inlined(%arg0: memref) { %cst2 = constant 2 : index %c0 = constant 0 : index %cst3 = "secret_constant"() : () -> index - // CHECK: "gpu.launch_func"(%[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]], %{{.*}}, %{{.*}}) {kernel = @extra_constants_not_inlined_kernel::@extra_constants_not_inlined_kernel} : (index, index, index, index, index, index, memref, index) -> () + // CHECK: gpu.launch_func @extra_constants_not_inlined_kernel::@extra_constants_not_inlined_kernel blocks in (%[[CST]], %[[CST]], %[[CST]]) threads in (%[[CST]], %[[CST]], %[[CST]]) args({{.*}} : memref, {{.*}} : index) gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %cst, %grid_y = %cst, %grid_z = %cst) threads(%tx, %ty, %tz) in (%block_x = %cst, %block_y = %cst, @@ -119,7 +119,7 @@ func @extra_constants(%arg0: memref) { %cst2 = constant 2 : index %c0 = constant 0 : index %cst3 = dim %arg0, %c0 : memref - // CHECK: "gpu.launch_func"(%[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[ARG0]]) {kernel = @extra_constants_kernel::@extra_constants_kernel} : (index, index, index, index, index, index, memref) -> () + // CHECK: gpu.launch_func @extra_constants_kernel::@extra_constants_kernel blocks in (%[[CST]], %[[CST]], %[[CST]]) threads in (%[[CST]], %[[CST]], %[[CST]]) args(%[[ARG0]] : memref) gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %cst, %grid_y = %cst, %grid_z = %cst) threads(%tx, %ty, %tz) in (%block_x = %cst, %block_y = %cst, @@ -130,7 +130,7 @@ func @extra_constants(%arg0: memref) { return } -// CHECK-LABEL: func @extra_constants_kernel +// CHECK-LABEL: func @extra_constants_kernel( // CHECK-SAME: %[[KARG0:.*]]: memref // CHECK: constant 2 // CHECK: constant 0 @@ -147,7 +147,7 @@ func @extra_constants_noarg(%arg0: memref, %arg1: memref) { %c0 = constant 0 : index // CHECK: dim %[[ARG1]] %cst3 = dim %arg1, %c0 : memref - // CHECK: "gpu.launch_func"(%[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[ARG0]], %{{.*}}) {kernel = @extra_constants_noarg_kernel::@extra_constants_noarg_kernel} : (index, index, index, index, index, index, memref, index) -> () + // CHECK: gpu.launch_func @extra_constants_noarg_kernel::@extra_constants_noarg_kernel blocks in (%[[CST]], %[[CST]], %[[CST]]) threads in (%[[CST]], %[[CST]], %[[CST]]) args(%[[ARG0]] : memref, {{.*}} : index) gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %cst, %grid_y = %cst, %grid_z = %cst) threads(%tx, %ty, %tz) in (%block_x = %cst, %block_y = %cst, @@ -158,7 +158,7 @@ func @extra_constants_noarg(%arg0: memref, %arg1: memref) { return } -// CHECK-LABEL: func @extra_constants_noarg_kernel +// CHECK-LABEL: func @extra_constants_noarg_kernel( // CHECK-SAME: %[[KARG0:.*]]: memref, %[[KARG1:.*]]: index // CHECK: %[[KCST:.*]] = constant 2 // CHECK: "use"(%[[KCST]], %[[KARG0]], %[[KARG1]]) From 3ac561d8c348a7bdc0313a268d5b3b4dcac118a2 Mon Sep 17 00:00:00 2001 From: Christian Sigg Date: Wed, 21 Oct 2020 08:24:53 +0200 Subject: [PATCH 059/179] [mlir][gpu] Add lowering to LLVM for `gpu.wait` and `gpu.wait async`. Reviewed By: herhut Differential Revision: https://reviews.llvm.org/D89686 --- mlir/include/mlir/Conversion/Passes.td | 1 + .../ConvertLaunchFuncToRuntimeCalls.cpp | 99 ++++++++++++++++++- .../lower-wait-to-gpu-runtime-calls.mlir | 21 ++++ 3 files changed, 120 insertions(+), 1 deletion(-) create mode 100644 mlir/test/Conversion/GPUCommon/lower-wait-to-gpu-runtime-calls.mlir diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td index 36618384bb392..63a58fbc53f43 100644 --- a/mlir/include/mlir/Conversion/Passes.td +++ b/mlir/include/mlir/Conversion/Passes.td @@ -91,6 +91,7 @@ def ConvertAVX512ToLLVM : Pass<"convert-avx512-to-llvm", "ModuleOp"> { def GpuToLLVMConversionPass : Pass<"gpu-to-llvm", "ModuleOp"> { let summary = "Convert GPU dialect to LLVM dialect with GPU runtime calls"; let constructor = "mlir::createGpuToLLVMConversionPass()"; + let dependentDialects = ["LLVM::LLVMDialect"]; let options = [ Option<"gpuBinaryAnnotation", "gpu-binary-annotation", "std::string", "", "Annotation attribute string for GPU binary">, diff --git a/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp b/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp index f7f5834e6351e..9d4c0c32dc82c 100644 --- a/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp +++ b/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp @@ -157,6 +157,34 @@ class ConvertHostRegisterOpToGpuRuntimeCallPattern ConversionPatternRewriter &rewriter) const override; }; +/// A rewrite pattern to convert gpu.wait operations into a GPU runtime +/// call. Currently it supports CUDA and ROCm (HIP). +class ConvertWaitOpToGpuRuntimeCallPattern + : public ConvertOpToGpuRuntimeCallPattern { +public: + ConvertWaitOpToGpuRuntimeCallPattern(LLVMTypeConverter &typeConverter) + : ConvertOpToGpuRuntimeCallPattern(typeConverter) {} + +private: + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override; +}; + +/// A rewrite pattern to convert gpu.wait async operations into a GPU runtime +/// call. Currently it supports CUDA and ROCm (HIP). +class ConvertWaitAsyncOpToGpuRuntimeCallPattern + : public ConvertOpToGpuRuntimeCallPattern { +public: + ConvertWaitAsyncOpToGpuRuntimeCallPattern(LLVMTypeConverter &typeConverter) + : ConvertOpToGpuRuntimeCallPattern(typeConverter) {} + +private: + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override; +}; + /// A rewrite patter to convert gpu.launch_func operations into a sequence of /// GPU runtime calls. Currently it supports CUDA and ROCm (HIP). /// @@ -257,6 +285,69 @@ LogicalResult ConvertHostRegisterOpToGpuRuntimeCallPattern::matchAndRewrite( return success(); } +// Converts `gpu.wait` to runtime calls. The operands are all CUDA or ROCm +// streams (i.e. void*). The converted op synchronizes the host with every +// stream and then destroys it. That is, it assumes that the stream is not used +// afterwards. In case this isn't correct, we will get a runtime error. +// Eventually, we will have a pass that guarantees this property. +LogicalResult ConvertWaitOpToGpuRuntimeCallPattern::matchAndRewrite( + Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const { + if (cast(op).asyncToken()) + return failure(); // The gpu.wait is async. + + Location loc = op->getLoc(); + + for (auto asyncDependency : operands) + streamSynchronizeCallBuilder.create(loc, rewriter, {asyncDependency}); + for (auto asyncDependency : operands) + streamDestroyCallBuilder.create(loc, rewriter, {asyncDependency}); + + rewriter.eraseOp(op); + return success(); +} + +// Converts `gpu.wait async` to runtime calls. The result is a new stream that +// is synchronized with all operands, which are CUDA or ROCm streams (i.e. +// void*). We create and record an event after the definition of the stream +// and make the new stream wait on that event before destroying it again. This +// assumes that there is no other use between the definition and this op, and +// the plan is to have a pass that guarantees this property. +LogicalResult ConvertWaitAsyncOpToGpuRuntimeCallPattern::matchAndRewrite( + Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const { + if (!cast(op).asyncToken()) + return failure(); // The gpu.wait is not async. + + Location loc = op->getLoc(); + + auto insertionPoint = rewriter.saveInsertionPoint(); + SmallVector events; + for (auto pair : llvm::zip(op->getOperands(), operands)) { + auto token = std::get<0>(pair); + if (auto *defOp = token.getDefiningOp()) { + rewriter.setInsertionPointAfter(defOp); + } else { + // If we can't find the defining op, we record the event at block start, + // which is late and therefore misses parallelism, but still valid. + rewriter.setInsertionPointToStart(op->getBlock()); + } + auto event = eventCreateCallBuilder.create(loc, rewriter, {}).getResult(0); + auto stream = std::get<1>(pair); + eventRecordCallBuilder.create(loc, rewriter, {event, stream}); + events.push_back(event); + } + rewriter.restoreInsertionPoint(insertionPoint); + auto stream = streamCreateCallBuilder.create(loc, rewriter, {}).getResult(0); + for (auto event : events) + streamWaitEventCallBuilder.create(loc, rewriter, {stream, event}); + for (auto event : events) + eventDestroyCallBuilder.create(loc, rewriter, {event}); + rewriter.replaceOp(op, {stream}); + + return success(); +} + // Creates a struct containing all kernel parameters on the stack and returns // an array of type-erased pointers to the fields of the struct. The array can // then be passed to the CUDA / ROCm (HIP) kernel launch calls. @@ -411,7 +502,13 @@ mlir::createGpuToLLVMConversionPass(StringRef gpuBinaryAnnotation) { void mlir::populateGpuToLLVMConversionPatterns( LLVMTypeConverter &converter, OwningRewritePatternList &patterns, StringRef gpuBinaryAnnotation) { - patterns.insert(converter); + converter.addConversion( + [context = &converter.getContext()](gpu::AsyncTokenType type) -> Type { + return LLVM::LLVMType::getInt8PtrTy(context); + }); + patterns.insert(converter); patterns.insert( converter, gpuBinaryAnnotation); patterns.insert(&converter.getContext()); diff --git a/mlir/test/Conversion/GPUCommon/lower-wait-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-wait-to-gpu-runtime-calls.mlir new file mode 100644 index 0000000000000..b6eacfb969dde --- /dev/null +++ b/mlir/test/Conversion/GPUCommon/lower-wait-to-gpu-runtime-calls.mlir @@ -0,0 +1,21 @@ +// RUN: mlir-opt -allow-unregistered-dialect %s --gpu-to-llvm | FileCheck %s + +module attributes {gpu.container_module} { + + func @foo() { + // CHECK: %[[t0:.*]] = llvm.call @mgpuStreamCreate + // CHECK: %[[e0:.*]] = llvm.call @mgpuEventCreate + // CHECK: llvm.call @mgpuEventRecord(%[[e0]], %[[t0]]) + %t0 = gpu.wait async + // CHECK: %[[t1:.*]] = llvm.call @mgpuStreamCreate + // CHECK: llvm.call @mgpuStreamWaitEvent(%[[t1]], %[[e0]]) + // CHECK: llvm.call @mgpuEventDestroy(%[[e0]]) + %t1 = gpu.wait async [%t0] + // CHECK: llvm.call @mgpuStreamSynchronize(%[[t0]]) + // CHECK: llvm.call @mgpuStreamSynchronize(%[[t1]]) + // CHECK: llvm.call @mgpuStreamDestroy(%[[t0]]) + // CHECK: llvm.call @mgpuStreamDestroy(%[[t1]]) + gpu.wait [%t0, %t1] + return + } +} From f6d7832f4cf8addc0e733df7c7b917959edf7f01 Mon Sep 17 00:00:00 2001 From: Joe Nash Date: Mon, 19 Oct 2020 16:52:51 -0400 Subject: [PATCH 060/179] [AMDGPU] Refactor SOPC & SOPP .td for extension We use the Real vs Pseudo instruction abstraction for other types of instructions to facilitate changes in opcode between gpu generations. This patch introduces that abstraction to SOPC and SOPP. Reviewed By: rampitec Differential Revision: https://reviews.llvm.org/D89738 Change-Id: I59d53c2c7058b49d05b60350f4062a9b542d3138 --- llvm/lib/Target/AMDGPU/SIInstrInfo.td | 10 +- llvm/lib/Target/AMDGPU/SOPInstructions.td | 510 +++++++++++++++------- 2 files changed, 351 insertions(+), 169 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 86c54efe34809..4c0e244ceb633 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2557,11 +2557,11 @@ def getVCMPXNoSDstOp : InstrMapping { // Maps a SOPP to a SOPP with S_NOP def getSOPPWithRelaxation : InstrMapping { - let FilterClass = "Base_SOPP"; - let RowFields = ["AsmString"]; - let ColFields = ["Size"]; - let KeyCol = ["4"]; - let ValueCols = [["8"]]; + let FilterClass = "SOPPRelaxTable"; + let RowFields = ["KeyName"]; + let ColFields = ["IsRelaxed"]; + let KeyCol = ["0"]; + let ValueCols = [["1"]]; } include "SIInstructions.td" diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index 76257ed1584be..f92e51d23ea8c 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -359,9 +359,9 @@ class SOP2_Pseudo op, SOP_Pseudo ps> : +class SOP2_Real op, SOP_Pseudo ps, string real_name = ps.Mnemonic> : InstSI , + real_name # " " # ps.AsmOperands, []>, Enc32 { let isPseudo = 0; let isCodeGenOnly = 0; @@ -930,88 +930,101 @@ let SubtargetPredicate = isGFX10Plus in { // SOPC Instructions //===----------------------------------------------------------------------===// -class SOPCe op> : Enc32 { - bits<8> src0; - bits<8> src1; - - let Inst{7-0} = src0; - let Inst{15-8} = src1; - let Inst{22-16} = op; - let Inst{31-23} = 0x17e; -} - -class SOPC op, dag outs, dag ins, string asm, - list pattern = []> : - InstSI, SOPCe { +class SOPC_Pseudo pattern=[]> : + SOP_Pseudo { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; let SALU = 1; let SOPC = 1; - let isCodeGenOnly = 0; let Defs = [SCC]; let SchedRW = [WriteSALU]; let UseNamedOperandTable = 1; } -class SOPC_Base op, RegisterOperand rc0, RegisterOperand rc1, - string opName, list pattern = []> : SOPC < - op, (outs), (ins rc0:$src0, rc1:$src1), - opName#" $src0, $src1", pattern > { - let Defs = [SCC]; +class SOPC_Real op, SOPC_Pseudo ps, string real_name = ps.Mnemonic> : + InstSI , + Enc32 { + let isPseudo = 0; + let isCodeGenOnly = 0; + + // copy relevant pseudo op flags + let SubtargetPredicate = ps.SubtargetPredicate; + let OtherPredicates = ps.OtherPredicates; + let AsmMatchConverter = ps.AsmMatchConverter; + let UseNamedOperandTable = ps.UseNamedOperandTable; + let TSFlags = ps.TSFlags; + + // encoding + bits<8> src0; + bits<8> src1; + + let Inst{7-0} = src0; + let Inst{15-8} = src1; + let Inst{22-16} = op; + let Inst{31-23} = 0x17e; +} + +class SOPC_Base pattern = []> : SOPC_Pseudo < + opName, (outs), (ins rc0:$src0, rc1:$src1), + "$src0, $src1", pattern > { } -class SOPC_Helper op, RegisterOperand rc, ValueType vt, + +class SOPC_Helper : SOPC_Base < - op, rc, rc, opName, + rc, rc, opName, [(set SCC, (si_setcc_uniform vt:$src0, vt:$src1, cond))] > { } -class SOPC_CMP_32 op, string opName, +class SOPC_CMP_32 - : SOPC_Helper, + : SOPC_Helper, Commutable_REV, SOPKInstTable<0, opName> { let isCompare = 1; let isCommutable = 1; } -class SOPC_CMP_64 op, string opName, +class SOPC_CMP_64 - : SOPC_Helper, + : SOPC_Helper, Commutable_REV { let isCompare = 1; let isCommutable = 1; } -class SOPC_32 op, string opName, list pattern = []> - : SOPC_Base; - -class SOPC_64_32 op, string opName, list pattern = []> - : SOPC_Base; - -def S_CMP_EQ_I32 : SOPC_CMP_32 <0x00, "s_cmp_eq_i32">; -def S_CMP_LG_I32 : SOPC_CMP_32 <0x01, "s_cmp_lg_i32">; -def S_CMP_GT_I32 : SOPC_CMP_32 <0x02, "s_cmp_gt_i32", COND_SGT>; -def S_CMP_GE_I32 : SOPC_CMP_32 <0x03, "s_cmp_ge_i32", COND_SGE>; -def S_CMP_LT_I32 : SOPC_CMP_32 <0x04, "s_cmp_lt_i32", COND_SLT, "s_cmp_gt_i32">; -def S_CMP_LE_I32 : SOPC_CMP_32 <0x05, "s_cmp_le_i32", COND_SLE, "s_cmp_ge_i32">; -def S_CMP_EQ_U32 : SOPC_CMP_32 <0x06, "s_cmp_eq_u32", COND_EQ>; -def S_CMP_LG_U32 : SOPC_CMP_32 <0x07, "s_cmp_lg_u32", COND_NE>; -def S_CMP_GT_U32 : SOPC_CMP_32 <0x08, "s_cmp_gt_u32", COND_UGT>; -def S_CMP_GE_U32 : SOPC_CMP_32 <0x09, "s_cmp_ge_u32", COND_UGE>; -def S_CMP_LT_U32 : SOPC_CMP_32 <0x0a, "s_cmp_lt_u32", COND_ULT, "s_cmp_gt_u32">; -def S_CMP_LE_U32 : SOPC_CMP_32 <0x0b, "s_cmp_le_u32", COND_ULE, "s_cmp_ge_u32">; - -def S_BITCMP0_B32 : SOPC_32 <0x0c, "s_bitcmp0_b32">; -def S_BITCMP1_B32 : SOPC_32 <0x0d, "s_bitcmp1_b32">; -def S_BITCMP0_B64 : SOPC_64_32 <0x0e, "s_bitcmp0_b64">; -def S_BITCMP1_B64 : SOPC_64_32 <0x0f, "s_bitcmp1_b64">; +class SOPC_32 pattern = []> + : SOPC_Base; + +class SOPC_64_32 pattern = []> + : SOPC_Base; + +def S_CMP_EQ_I32 : SOPC_CMP_32 <"s_cmp_eq_i32">; +def S_CMP_LG_I32 : SOPC_CMP_32 <"s_cmp_lg_i32">; +def S_CMP_GT_I32 : SOPC_CMP_32 <"s_cmp_gt_i32", COND_SGT>; +def S_CMP_GE_I32 : SOPC_CMP_32 <"s_cmp_ge_i32", COND_SGE>; +def S_CMP_LT_I32 : SOPC_CMP_32 <"s_cmp_lt_i32", COND_SLT, "s_cmp_gt_i32">; +def S_CMP_LE_I32 : SOPC_CMP_32 <"s_cmp_le_i32", COND_SLE, "s_cmp_ge_i32">; +def S_CMP_EQ_U32 : SOPC_CMP_32 <"s_cmp_eq_u32", COND_EQ>; +def S_CMP_LG_U32 : SOPC_CMP_32 <"s_cmp_lg_u32", COND_NE>; +def S_CMP_GT_U32 : SOPC_CMP_32 <"s_cmp_gt_u32", COND_UGT>; +def S_CMP_GE_U32 : SOPC_CMP_32 <"s_cmp_ge_u32", COND_UGE>; +def S_CMP_LT_U32 : SOPC_CMP_32 <"s_cmp_lt_u32", COND_ULT, "s_cmp_gt_u32">; +def S_CMP_LE_U32 : SOPC_CMP_32 <"s_cmp_le_u32", COND_ULE, "s_cmp_ge_u32">; + +def S_BITCMP0_B32 : SOPC_32 <"s_bitcmp0_b32">; +def S_BITCMP1_B32 : SOPC_32 <"s_bitcmp1_b32">; +def S_BITCMP0_B64 : SOPC_64_32 <"s_bitcmp0_b64">; +def S_BITCMP1_B64 : SOPC_64_32 <"s_bitcmp1_b64">; let SubtargetPredicate = isGFX6GFX7GFX8GFX9 in -def S_SETVSKIP : SOPC_32 <0x10, "s_setvskip">; +def S_SETVSKIP : SOPC_32 <"s_setvskip">; let SubtargetPredicate = isGFX8Plus in { -def S_CMP_EQ_U64 : SOPC_CMP_64 <0x12, "s_cmp_eq_u64", COND_EQ>; -def S_CMP_LG_U64 : SOPC_CMP_64 <0x13, "s_cmp_lg_u64", COND_NE>; +def S_CMP_EQ_U64 : SOPC_CMP_64 <"s_cmp_eq_u64", COND_EQ>; +def S_CMP_LG_U64 : SOPC_CMP_64 <"s_cmp_lg_u64", COND_NE>; } // End SubtargetPredicate = isGFX8Plus let SubtargetPredicate = HasVGPRIndexMode in { @@ -1019,10 +1032,11 @@ let SubtargetPredicate = HasVGPRIndexMode in { // register. We don't want to add mode register uses to every // instruction, and it's too complicated to deal with anyway. This is // modeled just as a side effect. -def S_SET_GPR_IDX_ON : SOPC <0x11, +def S_SET_GPR_IDX_ON : SOPC_Pseudo < + "s_set_gpr_idx_on" , (outs), (ins SSrc_b32:$src0, GPRIdxMode:$src1), - "s_set_gpr_idx_on $src0,$src1"> { + "$src0,$src1"> { let Defs = [M0, MODE]; // No scc def let Uses = [M0, MODE]; // Other bits of mode, m0 unmodified. let hasSideEffects = 1; // Sets mode.gpr_idx_en @@ -1034,225 +1048,237 @@ def S_SET_GPR_IDX_ON : SOPC <0x11, // SOPP Instructions //===----------------------------------------------------------------------===// -class Base_SOPP { - string AsmString = asm; -} - -class SOPPe op> : Enc32 { - bits <16> simm16; - - let Inst{15-0} = simm16; - let Inst{22-16} = op; - let Inst{31-23} = 0x17f; // encoding -} - -class SOPP op, dag ins, string asm, list pattern = []> : - InstSI <(outs), ins, asm, pattern >, SOPPe , Base_SOPP { - +class SOPP_Pseudo pattern=[], string keyName = opName> : + SOP_Pseudo { + let isPseudo = 1; + let isCodeGenOnly = 1; let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; let SALU = 1; let SOPP = 1; - let Size = 4; let SchedRW = [WriteSALU]; - let UseNamedOperandTable = 1; + bits <16> simm16; + bits <1> fixed_imm = 0; + string KeyName = keyName; +} + +class SOPPRelaxTable { + bit IsRelaxed = isRelaxed; + string KeyName = keyName # gfxip; } -def S_NOP : SOPP <0x00000000, (ins i16imm:$simm16), "s_nop $simm16">; +//spaces inserted in realname on instantiation of this record to allow s_endpgm to omit whitespace +class SOPP_Real op, SOPP_Pseudo ps, string real_name = ps.Mnemonic> : + InstSI { + let isPseudo = 0; + let isCodeGenOnly = 0; -class SOPP_w_nop_e op> : Enc64 { + // copy relevant pseudo op flags + let SubtargetPredicate = ps.SubtargetPredicate; + let OtherPredicates = ps.OtherPredicates; + let AsmMatchConverter = ps.AsmMatchConverter; + let UseNamedOperandTable = ps.UseNamedOperandTable; + let TSFlags = ps.TSFlags; bits <16> simm16; +} - let Inst{15-0} = simm16; +class SOPP_Real_32 op, SOPP_Pseudo ps, string real_name = ps.Mnemonic> : SOPP_Real, +Enc32 { + let Inst{15-0} = !if(ps.fixed_imm, ps.simm16, simm16); let Inst{22-16} = op; - let Inst{31-23} = 0x17f; // encoding - let Inst{47-32} = 0x0; - let Inst{54-48} = S_NOP.Inst{22-16}; // opcode - let Inst{63-55} = S_NOP.Inst{31-23}; // encoding + let Inst{31-23} = 0x17f; } -class SOPP_w_nop op, dag ins, string asm, list pattern = []> : - InstSI <(outs), ins, asm, pattern >, SOPP_w_nop_e , Base_SOPP { - - let mayLoad = 0; - let mayStore = 0; - let hasSideEffects = 0; - let SALU = 1; - let SOPP = 1; - let Size = 8; - let SchedRW = [WriteSALU]; - - let UseNamedOperandTable = 1; +class SOPP_Real_64 op, SOPP_Pseudo ps, string real_name = ps.Mnemonic> : SOPP_Real, +Enc64 { + // encoding + let Inst{15-0} = !if(ps.fixed_imm, ps.simm16, simm16); + let Inst{22-16} = op; + let Inst{31-23} = 0x17f; + //effectively a nop + let Inst{47-32} = 0x0; + let Inst{54-48} = 0x0; + let Inst{63-55} = 0x17f; } -multiclass SOPP_With_Relaxation op, dag ins, string asm, list pattern = []> { - def "" : SOPP ; - def _pad_s_nop : SOPP_w_nop ; +multiclass SOPP_With_Relaxation pattern=[]> { + def "" : SOPP_Pseudo ; + def _pad_s_nop : SOPP_Pseudo ; } -let isTerminator = 1 in { +def S_NOP : SOPP_Pseudo<"s_nop" , (ins i16imm:$simm16), "$simm16">; -def S_ENDPGM : SOPP <0x00000001, (ins EndpgmImm:$simm16), "s_endpgm$simm16"> { +let isTerminator = 1 in { +def S_ENDPGM : SOPP_Pseudo<"s_endpgm", (ins EndpgmImm:$simm16), "$simm16"> { let isBarrier = 1; let isReturn = 1; } -def S_ENDPGM_SAVED : SOPP <0x0000001B, (ins), "s_endpgm_saved"> { +def S_ENDPGM_SAVED : SOPP_Pseudo<"s_endpgm_saved", (ins)> { let SubtargetPredicate = isGFX8Plus; let simm16 = 0; + let fixed_imm = 1; let isBarrier = 1; let isReturn = 1; } let SubtargetPredicate = isGFX9Plus in { - let isBarrier = 1, isReturn = 1, simm16 = 0 in { + let isBarrier = 1, isReturn = 1, simm16 = 0, fixed_imm = 1 in { def S_ENDPGM_ORDERED_PS_DONE : - SOPP<0x01e, (ins), "s_endpgm_ordered_ps_done">; - } // End isBarrier = 1, isReturn = 1, simm16 = 0 + SOPP_Pseudo<"s_endpgm_ordered_ps_done", (ins)>; + } // End isBarrier = 1, isReturn = 1, simm16 = 0, fixed_imm = 1 } // End SubtargetPredicate = isGFX9Plus let SubtargetPredicate = isGFX10Plus in { - let isBarrier = 1, isReturn = 1, simm16 = 0 in { + let isBarrier = 1, isReturn = 1, simm16 = 0, fixed_imm = 1 in { def S_CODE_END : - SOPP<0x01f, (ins), "s_code_end">; - } // End isBarrier = 1, isReturn = 1, simm16 = 0 + SOPP_Pseudo<"s_code_end", (ins)>; + } // End isBarrier = 1, isReturn = 1, simm16 = 0, fixed_imm = 1 } // End SubtargetPredicate = isGFX10Plus let isBranch = 1, SchedRW = [WriteBranch] in { let isBarrier = 1 in { -defm S_BRANCH : SOPP_With_Relaxation < - 0x00000002, (ins sopp_brtarget:$simm16), "s_branch $simm16", +defm S_BRANCH : SOPP_With_Relaxation< + "s_branch" , (ins sopp_brtarget:$simm16), "$simm16", [(br bb:$simm16)]>; } let Uses = [SCC] in { -defm S_CBRANCH_SCC0 : SOPP_With_Relaxation < - 0x00000004, (ins sopp_brtarget:$simm16), - "s_cbranch_scc0 $simm16" +defm S_CBRANCH_SCC0 : SOPP_With_Relaxation< + "s_cbranch_scc0" , (ins sopp_brtarget:$simm16), + "$simm16" >; defm S_CBRANCH_SCC1 : SOPP_With_Relaxation < - 0x00000005, (ins sopp_brtarget:$simm16), - "s_cbranch_scc1 $simm16" + "s_cbranch_scc1" , (ins sopp_brtarget:$simm16), + "$simm16" >; } // End Uses = [SCC] let Uses = [VCC] in { defm S_CBRANCH_VCCZ : SOPP_With_Relaxation < - 0x00000006, (ins sopp_brtarget:$simm16), - "s_cbranch_vccz $simm16" + "s_cbranch_vccz" , (ins sopp_brtarget:$simm16), + "$simm16" >; defm S_CBRANCH_VCCNZ : SOPP_With_Relaxation < - 0x00000007, (ins sopp_brtarget:$simm16), - "s_cbranch_vccnz $simm16" + "s_cbranch_vccnz" , (ins sopp_brtarget:$simm16), + "$simm16" >; } // End Uses = [VCC] let Uses = [EXEC] in { defm S_CBRANCH_EXECZ : SOPP_With_Relaxation < - 0x00000008, (ins sopp_brtarget:$simm16), - "s_cbranch_execz $simm16" + "s_cbranch_execz" , (ins sopp_brtarget:$simm16), + "$simm16" >; defm S_CBRANCH_EXECNZ : SOPP_With_Relaxation < - 0x00000009, (ins sopp_brtarget:$simm16), - "s_cbranch_execnz $simm16" + "s_cbranch_execnz" , (ins sopp_brtarget:$simm16), + "$simm16" >; } // End Uses = [EXEC] defm S_CBRANCH_CDBGSYS : SOPP_With_Relaxation < - 0x00000017, (ins sopp_brtarget:$simm16), - "s_cbranch_cdbgsys $simm16" + "s_cbranch_cdbgsys" , (ins sopp_brtarget:$simm16), + "$simm16" >; defm S_CBRANCH_CDBGSYS_AND_USER : SOPP_With_Relaxation < - 0x0000001A, (ins sopp_brtarget:$simm16), - "s_cbranch_cdbgsys_and_user $simm16" + "s_cbranch_cdbgsys_and_user" , (ins sopp_brtarget:$simm16), + "$simm16" >; defm S_CBRANCH_CDBGSYS_OR_USER : SOPP_With_Relaxation < - 0x00000019, (ins sopp_brtarget:$simm16), - "s_cbranch_cdbgsys_or_user $simm16" + "s_cbranch_cdbgsys_or_user" , (ins sopp_brtarget:$simm16), + "$simm16" >; defm S_CBRANCH_CDBGUSER : SOPP_With_Relaxation < - 0x00000018, (ins sopp_brtarget:$simm16), - "s_cbranch_cdbguser $simm16" + "s_cbranch_cdbguser" , (ins sopp_brtarget:$simm16), + "$simm16" >; } // End isBranch = 1 } // End isTerminator = 1 let hasSideEffects = 1 in { -def S_BARRIER : SOPP <0x0000000a, (ins), "s_barrier", +def S_BARRIER : SOPP_Pseudo <"s_barrier", (ins), "", [(int_amdgcn_s_barrier)]> { let SchedRW = [WriteBarrier]; let simm16 = 0; + let fixed_imm = 1; let isConvergent = 1; } -def S_WAKEUP : SOPP <0x00000003, (ins), "s_wakeup"> { +def S_WAKEUP : SOPP_Pseudo <"s_wakeup", (ins) > { let SubtargetPredicate = isGFX8Plus; let simm16 = 0; + let fixed_imm = 1; let mayLoad = 1; let mayStore = 1; } let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in -def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "s_waitcnt $simm16", +def S_WAITCNT : SOPP_Pseudo <"s_waitcnt" , (ins WAIT_FLAG:$simm16), "$simm16", [(int_amdgcn_s_waitcnt timm:$simm16)]>; -def S_SETHALT : SOPP <0x0000000d, (ins i16imm:$simm16), "s_sethalt $simm16">; -def S_SETKILL : SOPP <0x0000000b, (ins i16imm:$simm16), "s_setkill $simm16">; +def S_SETHALT : SOPP_Pseudo <"s_sethalt" , (ins i16imm:$simm16), "$simm16">; +def S_SETKILL : SOPP_Pseudo <"s_setkill" , (ins i16imm:$simm16), "$simm16">; // On SI the documentation says sleep for approximately 64 * low 2 // bits, consistent with the reported maximum of 448. On VI the // maximum reported is 960 cycles, so 960 / 64 = 15 max, so is the // maximum really 15 on VI? -def S_SLEEP : SOPP <0x0000000e, (ins i32imm:$simm16), - "s_sleep $simm16", [(int_amdgcn_s_sleep timm:$simm16)]> { +def S_SLEEP : SOPP_Pseudo <"s_sleep", (ins i32imm:$simm16), + "$simm16", [(int_amdgcn_s_sleep timm:$simm16)]> { let hasSideEffects = 1; let mayLoad = 0; let mayStore = 0; } -def S_SETPRIO : SOPP <0x0000000f, (ins i16imm:$simm16), "s_setprio $simm16">; +def S_SETPRIO : SOPP_Pseudo <"s_setprio" , (ins i16imm:$simm16), "$simm16">; let Uses = [EXEC, M0] in { // FIXME: Should this be mayLoad+mayStore? -def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16), "s_sendmsg $simm16", +def S_SENDMSG : SOPP_Pseudo <"s_sendmsg" , (ins SendMsgImm:$simm16), "$simm16", [(int_amdgcn_s_sendmsg (i32 timm:$simm16), M0)]>; -def S_SENDMSGHALT : SOPP <0x00000011, (ins SendMsgImm:$simm16), "s_sendmsghalt $simm16", +def S_SENDMSGHALT : SOPP_Pseudo <"s_sendmsghalt" , (ins SendMsgImm:$simm16), "$simm16", [(int_amdgcn_s_sendmsghalt (i32 timm:$simm16), M0)]>; } // End Uses = [EXEC, M0] -def S_TRAP : SOPP <0x00000012, (ins i16imm:$simm16), "s_trap $simm16"> { +def S_TRAP : SOPP_Pseudo <"s_trap" , (ins i16imm:$simm16), "$simm16"> { let isTrap = 1; } -def S_ICACHE_INV : SOPP <0x00000013, (ins), "s_icache_inv"> { +def S_ICACHE_INV : SOPP_Pseudo <"s_icache_inv", (ins)> { let simm16 = 0; + let fixed_imm = 1; } -def S_INCPERFLEVEL : SOPP <0x00000014, (ins i32imm:$simm16), "s_incperflevel $simm16", +def S_INCPERFLEVEL : SOPP_Pseudo <"s_incperflevel", (ins i32imm:$simm16), "$simm16", [(int_amdgcn_s_incperflevel timm:$simm16)]> { let hasSideEffects = 1; let mayLoad = 0; let mayStore = 0; } -def S_DECPERFLEVEL : SOPP <0x00000015, (ins i32imm:$simm16), "s_decperflevel $simm16", +def S_DECPERFLEVEL : SOPP_Pseudo <"s_decperflevel", (ins i32imm:$simm16), "$simm16", [(int_amdgcn_s_decperflevel timm:$simm16)]> { let hasSideEffects = 1; let mayLoad = 0; let mayStore = 0; } -def S_TTRACEDATA : SOPP <0x00000016, (ins), "s_ttracedata"> { +def S_TTRACEDATA : SOPP_Pseudo <"s_ttracedata", (ins)> { let simm16 = 0; + let fixed_imm = 1; } let SubtargetPredicate = HasVGPRIndexMode in { -def S_SET_GPR_IDX_OFF : SOPP<0x1c, (ins), "s_set_gpr_idx_off"> { +def S_SET_GPR_IDX_OFF : SOPP_Pseudo<"s_set_gpr_idx_off", (ins) > { let simm16 = 0; + let fixed_imm = 1; let Defs = [MODE]; let Uses = [MODE]; } @@ -1260,8 +1286,9 @@ def S_SET_GPR_IDX_OFF : SOPP<0x1c, (ins), "s_set_gpr_idx_off"> { } // End hasSideEffects let SubtargetPredicate = HasVGPRIndexMode in { -def S_SET_GPR_IDX_MODE : SOPP<0x1d, (ins GPRIdxMode:$simm16), - "s_set_gpr_idx_mode$simm16"> { +def S_SET_GPR_IDX_MODE : SOPP_Pseudo<"s_set_gpr_idx_mode", (ins GPRIdxMode:$simm16), + "$simm16"> { + /*"s_set_gpr_idx_mode$simm16"> {*/ let Defs = [M0, MODE]; let Uses = [MODE]; } @@ -1269,26 +1296,27 @@ def S_SET_GPR_IDX_MODE : SOPP<0x1d, (ins GPRIdxMode:$simm16), let SubtargetPredicate = isGFX10Plus in { def S_INST_PREFETCH : - SOPP<0x020, (ins s16imm:$simm16), "s_inst_prefetch $simm16">; + SOPP_Pseudo<"s_inst_prefetch", (ins s16imm:$simm16), "$simm16">; def S_CLAUSE : - SOPP<0x021, (ins s16imm:$simm16), "s_clause $simm16">; + SOPP_Pseudo<"s_clause", (ins s16imm:$simm16), "$simm16">; def S_WAITCNT_IDLE : - SOPP <0x022, (ins), "s_wait_idle"> { + SOPP_Pseudo <"s_wait_idle", (ins), ""> { let simm16 = 0; + let fixed_imm = 1; } def S_WAITCNT_DEPCTR : - SOPP <0x023, (ins s16imm:$simm16), "s_waitcnt_depctr $simm16">; + SOPP_Pseudo <"s_waitcnt_depctr" , (ins s16imm:$simm16), "$simm16">; let hasSideEffects = 0, Uses = [MODE], Defs = [MODE] in { def S_ROUND_MODE : - SOPP<0x024, (ins s16imm:$simm16), "s_round_mode $simm16">; + SOPP_Pseudo<"s_round_mode", (ins s16imm:$simm16), "$simm16">; def S_DENORM_MODE : - SOPP<0x025, (ins i32imm:$simm16), "s_denorm_mode $simm16", + SOPP_Pseudo<"s_denorm_mode", (ins i32imm:$simm16), "$simm16", [(SIdenorm_mode (i32 timm:$simm16))]>; } def S_TTRACEDATA_IMM : - SOPP<0x028, (ins s16imm:$simm16), "s_ttracedata_imm $simm16">; + SOPP_Pseudo<"s_ttracedata_imm", (ins s16imm:$simm16), "$simm16">; } // End SubtargetPredicate = isGFX10Plus //===----------------------------------------------------------------------===// @@ -1379,15 +1407,25 @@ def : ScalarNot2Pat; // Target-specific instruction encodings. //===----------------------------------------------------------------------===// -//===----------------------------------------------------------------------===// -// SOP1 - GFX10. -//===----------------------------------------------------------------------===// - class Select_gfx10 : SIMCInstr { Predicate AssemblerPredicate = isGFX10Plus; string DecoderNamespace = "GFX10"; } +class Select_vi : SIMCInstr { + Predicate AssemblerPredicate = isGFX8GFX9; + string DecoderNamespace = "GFX8"; +} + +class Select_gfx6_gfx7 : SIMCInstr { + Predicate AssemblerPredicate = isGFX6GFX7; + string DecoderNamespace = "GFX6GFX7"; +} + +//===----------------------------------------------------------------------===// +// SOP1 - GFX10. +//===----------------------------------------------------------------------===// + multiclass SOP1_Real_gfx10 op> { def _gfx10 : SOP1_Real(NAME)>, Select_gfx10(NAME).Mnemonic>; @@ -1416,10 +1454,6 @@ defm S_MOVRELSD_2_B32 : SOP1_Real_gfx10<0x049>; // SOP1 - GFX6, GFX7. //===----------------------------------------------------------------------===// -class Select_gfx6_gfx7 : SIMCInstr { - Predicate AssemblerPredicate = isGFX6GFX7; - string DecoderNamespace = "GFX6GFX7"; -} multiclass SOP1_Real_gfx6_gfx7 op> { def _gfx6_gfx7 : SOP1_Real(NAME)>, @@ -1621,15 +1655,163 @@ defm S_SETREG_B32 : SOPK_Real32_gfx6_gfx7_gfx10<0x013>; defm S_SETREG_IMM32_B32 : SOPK_Real64_gfx6_gfx7_gfx10<0x015>; //===----------------------------------------------------------------------===// -// GFX8 (VI), GFX9. +// SOPP - GFX6, GFX7, GFX8, GFX9, GFX10 //===----------------------------------------------------------------------===// -class Select_vi : - SIMCInstr { - Predicate AssemblerPredicate = isGFX8GFX9; - string DecoderNamespace = "GFX8"; +multiclass SOPP_Real_32_gfx6_gfx7 op, string real_name = !cast(NAME).Mnemonic> { + def _gfx6_gfx7 : SOPP_Real_32(NAME), real_name>, + Select_gfx6_gfx7(NAME).Mnemonic>, + SOPPRelaxTable<0, !cast(NAME).KeyName, "_gfx6_gfx7">; +} + +multiclass SOPP_Real_32_gfx8_gfx9 op, string real_name = !cast(NAME).Mnemonic # " "> { + def _vi : SOPP_Real_32(NAME), real_name>, + Select_vi(NAME).Mnemonic>, + SOPPRelaxTable<0, !cast(NAME).KeyName, "_vi">; +} + +multiclass SOPP_Real_32_gfx10 op, string real_name = !cast(NAME).Mnemonic # " "> { + def _gfx10 : SOPP_Real_32(NAME), real_name>, + Select_gfx10(NAME).Mnemonic>, + SOPPRelaxTable<0, !cast(NAME).KeyName, "_gfx10">; +} + +multiclass SOPP_Real_32_gfx8_gfx9_gfx10 op, string real_name = !cast(NAME).Mnemonic # " "> : + SOPP_Real_32_gfx8_gfx9, SOPP_Real_32_gfx10; + +multiclass SOPP_Real_32_gfx6_gfx7_gfx8_gfx9 op, string real_name = !cast(NAME).Mnemonic # " "> : + SOPP_Real_32_gfx6_gfx7, SOPP_Real_32_gfx8_gfx9; + +multiclass SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10 op, string real_name = !cast(NAME).Mnemonic # " "> : + SOPP_Real_32_gfx6_gfx7_gfx8_gfx9, SOPP_Real_32_gfx10; + +//64 bit encodings, for Relaxation +multiclass SOPP_Real_64_gfx6_gfx7 op, string real_name = !cast(NAME).Mnemonic # " "> { + def _gfx6_gfx7 : SOPP_Real_64(NAME), real_name>, + Select_gfx6_gfx7(NAME).Mnemonic>, + SOPPRelaxTable<1, !cast(NAME).KeyName, "_gfx6_gfx7">; +} + +multiclass SOPP_Real_64_gfx8_gfx9 op, string real_name = !cast(NAME).Mnemonic # " "> { + def _vi : SOPP_Real_64(NAME), real_name>, + Select_vi(NAME).Mnemonic>, + SOPPRelaxTable<1, !cast(NAME).KeyName, "_vi">; +} + +multiclass SOPP_Real_64_gfx10 op, string real_name = !cast(NAME).Mnemonic # " "> { + def _gfx10 : SOPP_Real_64(NAME), real_name>, + Select_gfx10(NAME).Mnemonic>, + SOPPRelaxTable<1, !cast(NAME).KeyName, "_gfx10">; +} + +multiclass SOPP_Real_64_gfx8_gfx9_gfx10 op, string real_name = !cast(NAME).Mnemonic # " "> : + SOPP_Real_64_gfx8_gfx9, SOPP_Real_64_gfx10; + +multiclass SOPP_Real_64_gfx6_gfx7_gfx8_gfx9 op, string real_name = !cast(NAME).Mnemonic # " "> : + SOPP_Real_64_gfx6_gfx7, SOPP_Real_64_gfx8_gfx9; + +multiclass SOPP_Real_64_gfx6_gfx7_gfx8_gfx9_gfx10 op, string real_name = !cast(NAME).Mnemonic # " "> : + SOPP_Real_64_gfx6_gfx7_gfx8_gfx9, SOPP_Real_64_gfx10; + +//relaxation for insts with no operands not implemented +multiclass SOPP_Real_With_Relaxation_gfx6_gfx7_gfx8_gfx9_gfx10 op> { + defm "" : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10; + defm _pad_s_nop : SOPP_Real_64_gfx6_gfx7_gfx8_gfx9_gfx10; +} + +defm S_NOP : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<0x000>; +defm S_ENDPGM : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<0x001, "s_endpgm">; +defm S_BRANCH : SOPP_Real_With_Relaxation_gfx6_gfx7_gfx8_gfx9_gfx10<0x002>; +defm S_WAKEUP : SOPP_Real_32_gfx8_gfx9_gfx10<0x003>; +defm S_CBRANCH_SCC0 : SOPP_Real_With_Relaxation_gfx6_gfx7_gfx8_gfx9_gfx10<0x004>; +defm S_CBRANCH_SCC1 : SOPP_Real_With_Relaxation_gfx6_gfx7_gfx8_gfx9_gfx10<0x005>; +defm S_CBRANCH_VCCZ : SOPP_Real_With_Relaxation_gfx6_gfx7_gfx8_gfx9_gfx10<0x006>; +defm S_CBRANCH_VCCNZ : SOPP_Real_With_Relaxation_gfx6_gfx7_gfx8_gfx9_gfx10<0x007>; +defm S_CBRANCH_EXECZ : SOPP_Real_With_Relaxation_gfx6_gfx7_gfx8_gfx9_gfx10<0x008>; +defm S_CBRANCH_EXECNZ : SOPP_Real_With_Relaxation_gfx6_gfx7_gfx8_gfx9_gfx10<0x009>; +defm S_CBRANCH_CDBGSYS : SOPP_Real_With_Relaxation_gfx6_gfx7_gfx8_gfx9_gfx10<0x017>; +defm S_CBRANCH_CDBGUSER : SOPP_Real_With_Relaxation_gfx6_gfx7_gfx8_gfx9_gfx10<0x018>; +defm S_CBRANCH_CDBGSYS_OR_USER : SOPP_Real_With_Relaxation_gfx6_gfx7_gfx8_gfx9_gfx10<0x019>; +defm S_CBRANCH_CDBGSYS_AND_USER : SOPP_Real_With_Relaxation_gfx6_gfx7_gfx8_gfx9_gfx10<0x01A>; +defm S_BARRIER : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<0x00a>; +defm S_WAITCNT : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<0x00c>; +defm S_SETHALT : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<0x00d>; +defm S_SETKILL : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<0x00b>; +defm S_SLEEP : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<0x00e>; +defm S_SETPRIO : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<0x00f>; +defm S_SENDMSG : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<0x010>; +defm S_SENDMSGHALT : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<0x011>; +defm S_TRAP : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<0x012>; +defm S_ICACHE_INV : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<0x013>; +defm S_INCPERFLEVEL : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<0x014>; +defm S_DECPERFLEVEL : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<0x015>; +defm S_TTRACEDATA : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<0x016>; +defm S_ENDPGM_SAVED : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<0x01B>; +defm S_SET_GPR_IDX_OFF : SOPP_Real_32_gfx8_gfx9<0x01c>; +defm S_SET_GPR_IDX_MODE : SOPP_Real_32_gfx8_gfx9<0x01d>; +defm S_ENDPGM_ORDERED_PS_DONE : SOPP_Real_32_gfx8_gfx9_gfx10<0x01e>; +defm S_CODE_END : SOPP_Real_32_gfx10<0x01f>; +defm S_INST_PREFETCH : SOPP_Real_32_gfx10<0x020>; +defm S_CLAUSE : SOPP_Real_32_gfx10<0x021>; +defm S_WAITCNT_IDLE : SOPP_Real_32_gfx10<0x022>; +defm S_WAITCNT_DEPCTR : SOPP_Real_32_gfx10<0x023>; +defm S_ROUND_MODE : SOPP_Real_32_gfx10<0x024>; +defm S_DENORM_MODE : SOPP_Real_32_gfx10<0x025>; +defm S_TTRACEDATA_IMM : SOPP_Real_32_gfx10<0x028>; + +//===----------------------------------------------------------------------===// +// SOPC - GFX6, GFX7, GFX8, GFX9, GFX10 +//===----------------------------------------------------------------------===// + +multiclass SOPC_Real_gfx6_gfx7 op> { + def _gfx6_gfx7 : SOPC_Real(NAME)>, + Select_gfx6_gfx7(NAME).Mnemonic>; +} + +multiclass SOPC_Real_gfx8_gfx9 op> { + def _vi : SOPC_Real(NAME)>, + Select_vi(NAME).Mnemonic>; } +multiclass SOPC_Real_gfx10 op> { + def _gfx10 : SOPC_Real(NAME)>, + Select_gfx10(NAME).Mnemonic>; +} + +multiclass SOPC_Real_gfx8_gfx9_gfx10 op> : + SOPC_Real_gfx8_gfx9, SOPC_Real_gfx10; + +multiclass SOPC_Real_gfx6_gfx7_gfx8_gfx9 op> : + SOPC_Real_gfx6_gfx7, SOPC_Real_gfx8_gfx9; + +multiclass SOPC_Real_gfx6_gfx7_gfx8_gfx9_gfx10 op> : + SOPC_Real_gfx6_gfx7_gfx8_gfx9, SOPC_Real_gfx10; + +defm S_CMP_EQ_I32 : SOPC_Real_gfx6_gfx7_gfx8_gfx9_gfx10<0x00>; +defm S_CMP_LG_I32 : SOPC_Real_gfx6_gfx7_gfx8_gfx9_gfx10<0x01>; +defm S_CMP_GT_I32 : SOPC_Real_gfx6_gfx7_gfx8_gfx9_gfx10<0x02>; +defm S_CMP_GE_I32 : SOPC_Real_gfx6_gfx7_gfx8_gfx9_gfx10<0x03>; +defm S_CMP_LT_I32 : SOPC_Real_gfx6_gfx7_gfx8_gfx9_gfx10<0x04>; +defm S_CMP_LE_I32 : SOPC_Real_gfx6_gfx7_gfx8_gfx9_gfx10<0x05>; +defm S_CMP_EQ_U32 : SOPC_Real_gfx6_gfx7_gfx8_gfx9_gfx10<0x06>; +defm S_CMP_LG_U32 : SOPC_Real_gfx6_gfx7_gfx8_gfx9_gfx10<0x07>; +defm S_CMP_GT_U32 : SOPC_Real_gfx6_gfx7_gfx8_gfx9_gfx10<0x08>; +defm S_CMP_GE_U32 : SOPC_Real_gfx6_gfx7_gfx8_gfx9_gfx10<0x09>; +defm S_CMP_LT_U32 : SOPC_Real_gfx6_gfx7_gfx8_gfx9_gfx10<0x0a>; +defm S_CMP_LE_U32 : SOPC_Real_gfx6_gfx7_gfx8_gfx9_gfx10<0x0b>; +defm S_BITCMP0_B32 : SOPC_Real_gfx6_gfx7_gfx8_gfx9_gfx10<0x0c>; +defm S_BITCMP1_B32 : SOPC_Real_gfx6_gfx7_gfx8_gfx9_gfx10<0x0d>; +defm S_BITCMP0_B64 : SOPC_Real_gfx6_gfx7_gfx8_gfx9_gfx10<0x0e>; +defm S_BITCMP1_B64 : SOPC_Real_gfx6_gfx7_gfx8_gfx9_gfx10<0x0f>; +defm S_SETVSKIP : SOPC_Real_gfx6_gfx7_gfx8_gfx9<0x10>; +defm S_SET_GPR_IDX_ON : SOPC_Real_gfx8_gfx9<0x11>; +defm S_CMP_EQ_U64 : SOPC_Real_gfx8_gfx9_gfx10<0x12>; +defm S_CMP_LG_U64 : SOPC_Real_gfx8_gfx9_gfx10<0x13>; + +//===----------------------------------------------------------------------===// +// GFX8 (VI), GFX9. +//===----------------------------------------------------------------------===// + class SOP1_Real_vi op, SOP1_Pseudo ps> : SOP1_Real, Select_vi; From ba60de5250ce1c4baa4a7bb7098ac67349f88a99 Mon Sep 17 00:00:00 2001 From: John Brawn Date: Wed, 21 Oct 2020 17:34:47 +0100 Subject: [PATCH 061/179] Use -### in arm-float-abi.c test This is needed to prevent the test from failing when llvm is configured so that the arm target is not present, which is the case for some buildbots. --- clang/test/Driver/arm-float-abi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/clang/test/Driver/arm-float-abi.c b/clang/test/Driver/arm-float-abi.c index 294f024447695..806ef0fbd93e6 100644 --- a/clang/test/Driver/arm-float-abi.c +++ b/clang/test/Driver/arm-float-abi.c @@ -20,9 +20,9 @@ // RUN: | FileCheck --check-prefix=CHECK-ANDROID-ERROR %s // CHECK-ANDROID-ERROR: unsupported option '-mfloat-abi=hard' for target 'armv7-unknown-linux-android21' -// RUN: %clang -target armv7-linux-androideabi21 %s -S -o - -mfloat-abi=soft 2>&1 \ +// RUN: %clang -target armv7-linux-androideabi21 %s -S -o - -mfloat-abi=soft -### 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-ANDROID-NOERROR %s -// RUN: %clang -target armv7-linux-androideabi21 %s -S -o - -mfloat-abi=softfp 2>&1 \ +// RUN: %clang -target armv7-linux-androideabi21 %s -S -o - -mfloat-abi=softfp -### 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-ANDROID-NOERROR %s // CHECK-ANDROID-NOERROR-NOT: unsupported option @@ -34,6 +34,6 @@ // RUN: | FileCheck --check-prefix=CHECK-WATCHOS-ERROR2 %s // CHECK-WATCHOS-ERROR2: unsupported option '-mfloat-abi=softfp' for target 'thumbv7-apple-watchos4' -// RUN: %clang -target armv7-apple-watchos4 %s -S -o - -mfloat-abi=hard 2>&1 \ +// RUN: %clang -target armv7-apple-watchos4 %s -S -o - -mfloat-abi=hard -### 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-WATCHOS-NOERROR %s // CHECK-WATCHOS-NOERROR-NOT: unsupported option From 4a8b52b53de640022a7eca0dbcefa61802cf63b0 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Wed, 21 Oct 2020 12:43:01 -0400 Subject: [PATCH 062/179] [gn build] (manually) port 54fa9ecd3088 --- llvm/utils/gn/secondary/libcxx/include/BUILD.gn | 3 +++ 1 file changed, 3 insertions(+) diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index e30622f52195f..bf1da70220fb7 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -80,6 +80,7 @@ copy("include") { "any", "array", "atomic", + "barrier", "bit", "bitset", "cassert", @@ -157,6 +158,7 @@ copy("include") { "iostream", "istream", "iterator", + "latch", "limits", "limits.h", "list", @@ -176,6 +178,7 @@ copy("include") { "ratio", "regex", "scoped_allocator", + "semaphore", "set", "setjmp.h", "shared_mutex", From 37c030f81a9fdd7a7e1b6fa5407b277c1ab1afa1 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Wed, 21 Oct 2020 12:50:22 -0400 Subject: [PATCH 063/179] [gn build] (manually) port 5d796645d6c8cade (libcxx __config change) --- .../gn/secondary/libcxx/include/BUILD.gn | 474 +++++++++--------- llvm/utils/gn/secondary/libcxx/src/BUILD.gn | 9 +- .../utils/gn/secondary/libcxxabi/src/BUILD.gn | 7 +- 3 files changed, 252 insertions(+), 238 deletions(-) diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index bf1da70220fb7..f53304df645c9 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -1,4 +1,3 @@ -import("//clang/resource_dir.gni") import("//libcxx/config.gni") import("//llvm/utils/gn/build/write_cmake_config.gni") @@ -7,252 +6,267 @@ declare_args() { libcxx_install_support_headers = true } -libcxx_needs_site_config = - libcxx_abi_version != 1 || libcxx_abi_namespace != "" || libcxx_abi_unstable +libcxx_generated_include_dir = "$root_build_dir/include/c++/v1" -if (libcxx_needs_site_config) { - write_cmake_config("write_config") { +# This is a bit weird. For now, we assume that __config_site is identical +# in all toolchains, and only copy it (and all other libcxx headers) +# to 'include' in the root build dir, so that it's the same for all toolchains. +# Maybe we wnt to make this per-toolchain eventually (and then use root_out_dir +# in libcxx_generated_include_dir) -- e.g. for cross-builds that for example +# use for-linux-configured libc++ for the host build but for-windows-configured +# libc++ for the target build. +if (current_toolchain == default_toolchain) { + write_cmake_config("write_config_site") { input = "__config_site.in" - output = "$target_gen_dir/__config_site" + output = "$libcxx_generated_include_dir/__config_site" - values = [] + values = [ + "_LIBCPP_ABI_FORCE_ITANIUM=", + "_LIBCPP_ABI_FORCE_MICROSOFT=", + "_LIBCPP_HIDE_FROM_ABI_PER_TU_BY_DEFAULT=", + "_LIBCPP_HAS_NO_GLOBAL_FILESYSTEM_NAMESPACE=", + "_LIBCPP_HAS_NO_STDIN=", + "_LIBCPP_HAS_NO_STDOUT=", + "_LIBCPP_HAS_NO_THREADS=", + "_LIBCPP_HAS_NO_MONOTONIC_CLOCK=", + "_LIBCPP_HAS_NO_THREAD_UNSAFE_C_FUNCTIONS=", + "_LIBCPP_HAS_MUSL_LIBC=", + "_LIBCPP_HAS_THREAD_API_PTHREAD=", + "_LIBCPP_HAS_THREAD_API_EXTERNAL=", + "_LIBCPP_HAS_THREAD_API_WIN32=", + "_LIBCPP_HAS_THREAD_LIBRARY_EXTERNAL=", + "_LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS=", + "_LIBCPP_NO_VCRUNTIME=", + "_LIBCPP_TYPEINFO_COMPARISON_IMPLEMENTATION=", + "_LIBCPP_HAS_PARALLEL_ALGORITHMS=", + "_LIBCPP_HAS_NO_RANDOM_DEVICE=", + "_LIBCPP_ABI_DEFINES=", + ] if (libcxx_abi_version != 1) { values += [ "_LIBCPP_ABI_VERSION=$libcxx_abi_version" ] + } else { + values += [ "_LIBCPP_ABI_VERSION=" ] } - if (libcxx_abi_namespace != "") { - values += [ "_LIBCPP_ABI_NAMESPACE=$libcxx_abi_namespace" ] - } + values += [ "_LIBCPP_ABI_NAMESPACE=$libcxx_abi_namespace" ] if (libcxx_abi_unstable) { values += [ "_LIBCPP_ABI_UNSTABLE=1" ] + } else { + values += [ "_LIBCPP_ABI_UNSTABLE=" ] } } - # Generate a custom __config header. The new header is created - # by prepending __config_site to the current __config header. - action("concat_config") { - script = "//libcxx/utils/cat_files.py" - inputs = [ - "$target_gen_dir/__config_site", - "__config", - ] - outputs = [ "$target_gen_dir/__config" ] - args = [ - "$target_gen_dir/__config_site", + copy("copy_headers") { + sources = [ + "__bit_reference", + "__bsd_locale_defaults.h", + "__bsd_locale_fallbacks.h", "__config", - "-o", - "$target_gen_dir/__config", + "__debug", + "__errc", + "__functional_03", + "__functional_base", + "__functional_base_03", + "__hash_table", + "__libcpp_version", + "__locale", + "__mutex_base", + "__node_handle", + "__nullptr", + "__split_buffer", + "__sso_allocator", + "__std_stream", + "__string", + "__threading_support", + "__tree", + "__tuple", + "__undef_macros", + "algorithm", + "any", + "array", + "atomic", + "barrier", + "bit", + "bitset", + "cassert", + "ccomplex", + "cctype", + "cerrno", + "cfenv", + "cfloat", + "charconv", + "chrono", + "cinttypes", + "ciso646", + "climits", + "clocale", + "cmath", + "codecvt", + "compare", + "complex", + "complex.h", + "condition_variable", + "csetjmp", + "csignal", + "cstdarg", + "cstdbool", + "cstddef", + "cstdint", + "cstdio", + "cstdlib", + "cstring", + "ctgmath", + "ctime", + "ctype.h", + "cwchar", + "cwctype", + "deque", + "errno.h", + "exception", + "experimental/__config", + "experimental/__memory", + "experimental/algorithm", + "experimental/coroutine", + "experimental/deque", + "experimental/filesystem", + "experimental/forward_list", + "experimental/functional", + "experimental/iterator", + "experimental/list", + "experimental/map", + "experimental/memory_resource", + "experimental/propagate_const", + "experimental/regex", + "experimental/set", + "experimental/simd", + "experimental/string", + "experimental/type_traits", + "experimental/unordered_map", + "experimental/unordered_set", + "experimental/utility", + "experimental/vector", + "ext/__hash", + "ext/hash_map", + "ext/hash_set", + "fenv.h", + "filesystem", + "float.h", + "forward_list", + "fstream", + "functional", + "future", + "initializer_list", + "inttypes.h", + "iomanip", + "ios", + "iosfwd", + "iostream", + "istream", + "iterator", + "latch", + "limits", + "limits.h", + "list", + "locale", + "locale.h", + "map", + "math.h", + "memory", + "module.modulemap", + "mutex", + "new", + "numeric", + "optional", + "ostream", + "queue", + "random", + "ratio", + "regex", + "scoped_allocator", + "semaphore", + "set", + "setjmp.h", + "shared_mutex", + "span", + "sstream", + "stack", + "stdbool.h", + "stddef.h", + "stdexcept", + "stdint.h", + "stdio.h", + "stdlib.h", + "streambuf", + "string", + "string.h", + "string_view", + "strstream", + "system_error", + "tgmath.h", + "thread", + "tuple", + "type_traits", + "typeindex", + "typeinfo", + "unordered_map", + "unordered_set", + "utility", + "valarray", + "variant", + "vector", + "version", + "wchar.h", + "wctype.h", ] - deps = [ ":write_config" ] + deps = [] + if (target_os != "mac" && target_os != "win") { + # libcxx/cmake/Modules/HandleLibCXXABI.cmake sets + # LIBCXX_CXX_ABI_HEADER_TARGET if the libcxx abi library either of + # "libstdc++", "libsupc++", "libcxxabi", "libcxxrt", but not if it's "none", + # "default", or "vcruntime". So on Windows, these don't get copied due to + # LIBCXX_CXX_ABI_HEADER_TARGET not being set. + # On macOS, libcxx/CMakeLists.txt sets LIBCXX_CXX_ABI_SYSTEM to 1, which + # causes an empty header list to be passed to setup_abi_lib, so these + # don't get copied on macOS due to that. + deps += [ "//libcxxabi/include" ] + } + if (libcxx_install_support_headers) { + sources += [ + "support/android/locale_bionic.h", + "support/fuchsia/xlocale.h", + "support/ibm/limits.h", + "support/ibm/locale_mgmt_aix.h", + "support/ibm/support.h", + "support/ibm/xlocale.h", + "support/musl/xlocale.h", + "support/newlib/xlocale.h", + "support/solaris/floatingpoint.h", + "support/solaris/wchar.h", + "support/solaris/xlocale.h", + "support/xlocale/__nop_locale_mgmt.h", + "support/xlocale/__posix_l_fallback.h", + "support/xlocale/__strtonum_fallback.h", + ] + if (target_os == "win") { + sources += [ + "support/win32/limits_msvc_win32.h", + "support/win32/locale_win32.h", + ] + } + } + outputs = [ "$libcxx_generated_include_dir/{{source_target_relative}}" ] } +} - copy("copy_config") { - sources = [ "$target_gen_dir/__config" ] - outputs = [ "$clang_resource_dir/include/c++/v1/{{source_file_part}}" ] - deps = [ ":concat_config" ] - } +config("include_config") { + include_dirs = [ libcxx_generated_include_dir ] } -copy("include") { - sources = [ - "__bit_reference", - "__bsd_locale_defaults.h", - "__bsd_locale_fallbacks.h", - "__debug", - "__errc", - "__functional_03", - "__functional_base", - "__functional_base_03", - "__hash_table", - "__libcpp_version", - "__locale", - "__mutex_base", - "__node_handle", - "__nullptr", - "__split_buffer", - "__sso_allocator", - "__std_stream", - "__string", - "__threading_support", - "__tree", - "__tuple", - "__undef_macros", - "algorithm", - "any", - "array", - "atomic", - "barrier", - "bit", - "bitset", - "cassert", - "ccomplex", - "cctype", - "cerrno", - "cfenv", - "cfloat", - "charconv", - "chrono", - "cinttypes", - "ciso646", - "climits", - "clocale", - "cmath", - "codecvt", - "compare", - "complex", - "complex.h", - "condition_variable", - "csetjmp", - "csignal", - "cstdarg", - "cstdbool", - "cstddef", - "cstdint", - "cstdio", - "cstdlib", - "cstring", - "ctgmath", - "ctime", - "ctype.h", - "cwchar", - "cwctype", - "deque", - "errno.h", - "exception", - "experimental/__config", - "experimental/__memory", - "experimental/algorithm", - "experimental/coroutine", - "experimental/deque", - "experimental/filesystem", - "experimental/forward_list", - "experimental/functional", - "experimental/iterator", - "experimental/list", - "experimental/map", - "experimental/memory_resource", - "experimental/propagate_const", - "experimental/regex", - "experimental/set", - "experimental/simd", - "experimental/string", - "experimental/type_traits", - "experimental/unordered_map", - "experimental/unordered_set", - "experimental/utility", - "experimental/vector", - "ext/__hash", - "ext/hash_map", - "ext/hash_set", - "fenv.h", - "filesystem", - "float.h", - "forward_list", - "fstream", - "functional", - "future", - "initializer_list", - "inttypes.h", - "iomanip", - "ios", - "iosfwd", - "iostream", - "istream", - "iterator", - "latch", - "limits", - "limits.h", - "list", - "locale", - "locale.h", - "map", - "math.h", - "memory", - "module.modulemap", - "mutex", - "new", - "numeric", - "optional", - "ostream", - "queue", - "random", - "ratio", - "regex", - "scoped_allocator", - "semaphore", - "set", - "setjmp.h", - "shared_mutex", - "span", - "sstream", - "stack", - "stdbool.h", - "stddef.h", - "stdexcept", - "stdint.h", - "stdio.h", - "stdlib.h", - "streambuf", - "string", - "string.h", - "string_view", - "strstream", - "system_error", - "tgmath.h", - "thread", - "tuple", - "type_traits", - "typeindex", - "typeinfo", - "unordered_map", - "unordered_set", - "utility", - "valarray", - "variant", - "vector", - "version", - "wchar.h", - "wctype.h", - ] - deps = [] - if (target_os != "mac" && target_os != "win") { - # libcxx/cmake/Modules/HandleLibCXXABI.cmake sets - # LIBCXX_CXX_ABI_HEADER_TARGET if the libcxx abi library either of - # "libstdc++", "libsupc++", "libcxxabi", "libcxxrt", but not if it's "none", - # "default", or "vcruntime". So on Windows, these don't get copied due to - # LIBCXX_CXX_ABI_HEADER_TARGET not being set. - # On macOS, libcxx/CMakeLists.txt sets LIBCXX_CXX_ABI_SYSTEM to 1, which - # causes an empty header list to be passed to setup_abi_lib, so these - # don't get copied on macOS due to that. - deps += [ "//libcxxabi/include" ] - } - if (!libcxx_needs_site_config) { - sources += [ "__config" ] - } else { - deps += [ ":copy_config" ] - } - if (libcxx_install_support_headers) { - sources += [ - "support/android/locale_bionic.h", - "support/fuchsia/xlocale.h", - "support/ibm/limits.h", - "support/ibm/locale_mgmt_aix.h", - "support/ibm/support.h", - "support/ibm/xlocale.h", - "support/musl/xlocale.h", - "support/newlib/xlocale.h", - "support/solaris/floatingpoint.h", - "support/solaris/wchar.h", - "support/solaris/xlocale.h", - "support/xlocale/__nop_locale_mgmt.h", - "support/xlocale/__posix_l_fallback.h", - "support/xlocale/__strtonum_fallback.h", +group("include") { + if (current_toolchain == default_toolchain) { + deps = [ + ":copy_headers", + ":write_config_site", ] - if (target_os == "win") { - sources += [ - "support/win32/limits_msvc_win32.h", - "support/win32/locale_win32.h", - ] - } } - outputs = [ "$root_build_dir/include/c++/v1/{{source_target_relative}}" ] + public_configs = [ ":include_config" ] } diff --git a/llvm/utils/gn/secondary/libcxx/src/BUILD.gn b/llvm/utils/gn/secondary/libcxx/src/BUILD.gn index 71668b65f72f5..d4b43d6808c75 100644 --- a/llvm/utils/gn/secondary/libcxx/src/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/src/BUILD.gn @@ -1,4 +1,5 @@ import("//clang/runtimes.gni") +import("//libcxx/config.gni") import("//llvm/utils/gn/build/symlink_or_copy.gni") declare_args() { @@ -37,10 +38,7 @@ declare_args() { } config("cxx_config") { - include_dirs = [ - "//libcxxabi/include", - "//libcxx/include", - ] + include_dirs = [ "//libcxxabi/include" ] cflags = [ "-Wall", "-Wextra", @@ -203,6 +201,7 @@ if (libcxx_enable_shared) { sources = cxx_sources deps = [ "//compiler-rt/lib/builtins", + "//libcxx/include", "//libcxxabi/src:cxxabi_shared", "//libunwind/src:unwind_shared", ] @@ -252,6 +251,7 @@ if (libcxx_enable_static) { } deps = [ "//compiler-rt/lib/builtins", + "//libcxx/include", "//libcxxabi/src:cxxabi_static", "//libunwind/src:unwind_static", ] @@ -268,6 +268,7 @@ if (libcxx_enable_experimental) { output_dir = runtimes_dir output_name = "c++experimental" sources = [ "experimental/memory_resource.cpp" ] + deps = [ "//libcxx/include" ] configs += [ ":cxx_config" ] configs -= [ "//llvm/utils/gn/build:no_exceptions", diff --git a/llvm/utils/gn/secondary/libcxxabi/src/BUILD.gn b/llvm/utils/gn/secondary/libcxxabi/src/BUILD.gn index 3e6063a211753..7cccf866db2ba 100644 --- a/llvm/utils/gn/secondary/libcxxabi/src/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxxabi/src/BUILD.gn @@ -59,10 +59,7 @@ if (target_os == "linux" || target_os == "fuchsia") { } config("cxxabi_config") { - include_dirs = [ - "//libcxxabi/include", - "//libcxx/include", - ] + include_dirs = [ "//libcxxabi/include" ] cflags_cc = [ "-nostdinc++" ] defines = [ "_LIBCXXABI_BUILDING_LIBRARY" ] if (target_os == "win") { @@ -86,6 +83,7 @@ if (libcxxabi_enable_shared) { public = cxxabi_headers deps = [ "//compiler-rt/lib/builtins", + "//libcxx/include", "//libunwind/src:unwind_shared", ] configs += [ ":cxxabi_config" ] @@ -116,6 +114,7 @@ if (libcxxabi_enable_static) { } deps = [ "//compiler-rt/lib/builtins", + "//libcxx/include", "//libunwind/src:unwind_static", ] configs += [ ":cxxabi_config" ] From b5aa67446e01bd277727b05710a42e69ac41e74b Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Wed, 21 Oct 2020 12:53:24 -0400 Subject: [PATCH 064/179] [libc++] Fix the installation of libc++ headers since the __config_site change --- libcxx/include/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index a8d6f74ea38f0..82af0e77e1ef4 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -223,14 +223,14 @@ if (LIBCXX_INSTALL_HEADERS) endforeach() # Install the generated __config_site. - install(FILES ${LIBCXX_BINARY_DIR}/__config_site + install(FILES ${LIBCXX_GENERATED_INCLUDE_DIR}/__config_site DESTINATION ${LIBCXX_INSTALL_HEADER_PREFIX}include/c++/v1 PERMISSIONS OWNER_READ OWNER_WRITE GROUP_READ WORLD_READ COMPONENT cxx-headers) if (NOT CMAKE_CONFIGURATION_TYPES) add_custom_target(install-cxx-headers - DEPENDS cxx-headers cxx-generated-config + DEPENDS cxx-headers COMMAND "${CMAKE_COMMAND}" -DCMAKE_INSTALL_COMPONENT=cxx-headers -P "${CMAKE_BINARY_DIR}/cmake_install.cmake") From 8b7dac81d378c339d3e55f6f51cd0c42803903ad Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Wed, 21 Oct 2020 12:57:50 -0400 Subject: [PATCH 065/179] [gn build] try to fix up deps of __config_site after 37c030f81a --- llvm/utils/gn/secondary/libcxx/include/BUILD.gn | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index f53304df645c9..2695a58dff54b 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -262,11 +262,9 @@ config("include_config") { } group("include") { - if (current_toolchain == default_toolchain) { - deps = [ - ":copy_headers", - ":write_config_site", - ] - } + deps = [ + ":copy_headers($default_toolchain)", + ":write_config_site($default_toolchain)", + ] public_configs = [ ":include_config" ] } From cf34dd0c4e84b69bb901f9cd4f3491852238ec44 Mon Sep 17 00:00:00 2001 From: Tyker Date: Thu, 1 Oct 2020 17:58:07 +0200 Subject: [PATCH 066/179] [clang] Improve Serialization/Imporing/Dumping of APValues Changes: - initializer expressions of constexpr variable are now wraped in a ConstantExpr. this is mainly used for testing purposes. the old caching system has not yet been removed. - Add all the missing Serialization and Importing for APValue. - Improve dumping of APValue when ASTContext isn't available. - Cleanup leftover from last patch. - Add Tests for Import and serialization. Differential Revision: https://reviews.llvm.org/D63640 --- clang/include/clang/AST/APValue.h | 28 +- clang/include/clang/AST/ASTContext.h | 3 - clang/include/clang/AST/ASTImporter.h | 8 + clang/lib/AST/APValue.cpp | 43 ++- clang/lib/AST/ASTContext.cpp | 3 - clang/lib/AST/ASTImporter.cpp | 183 +++++++++- clang/lib/AST/Expr.cpp | 3 +- clang/lib/Serialization/ASTReader.cpp | 152 ++++++-- clang/lib/Serialization/ASTWriter.cpp | 91 ++++- clang/test/ASTMerge/APValue/APValue.cpp | 462 ++++++++++++++++++++++++ 10 files changed, 912 insertions(+), 64 deletions(-) create mode 100644 clang/test/ASTMerge/APValue/APValue.cpp diff --git a/clang/include/clang/AST/APValue.h b/clang/include/clang/AST/APValue.h index 0ee48f35a20a1..04892d43e4a0f 100644 --- a/clang/include/clang/AST/APValue.h +++ b/clang/include/clang/AST/APValue.h @@ -235,8 +235,10 @@ class APValue { struct UninitArray {}; struct UninitStruct {}; - friend class ASTReader; + friend class ASTRecordReader; friend class ASTWriter; + friend class ASTImporter; + friend class ASTNodeImporter; private: ValueKind Kind; @@ -569,11 +571,9 @@ class APValue { *(APFixedPoint *)(char *)Data.buffer = std::move(FX); } void setVector(const APValue *E, unsigned N) { - assert(isVector() && "Invalid accessor"); - ((Vec*)(char*)Data.buffer)->Elts = new APValue[N]; - ((Vec*)(char*)Data.buffer)->NumElts = N; + MutableArrayRef InternalElts = setVectorUninit(N); for (unsigned i = 0; i != N; ++i) - ((Vec*)(char*)Data.buffer)->Elts[i] = E[i]; + InternalElts[i] = E[i]; } void setComplexInt(APSInt R, APSInt I) { assert(R.getBitWidth() == I.getBitWidth() && @@ -656,6 +656,24 @@ class APValue { new ((void*)(char*)Data.buffer) AddrLabelDiffData(); Kind = AddrLabelDiff; } + +private: + /// The following functions are used as part of initialization, during + /// deserialization and importing. Reserve the space so that it can be + /// filled in by those steps. + MutableArrayRef setVectorUninit(unsigned N) { + assert(isVector() && "Invalid accessor"); + Vec *V = ((Vec *)(char *)Data.buffer); + V->Elts = new APValue[N]; + V->NumElts = N; + return {V->Elts, V->NumElts}; + } + MutableArrayRef + setLValueUninit(LValueBase B, const CharUnits &O, unsigned Size, + bool OnePastTheEnd, bool IsNullPtr); + MutableArrayRef + setMemberPointerUninit(const ValueDecl *Member, bool IsDerivedMember, + unsigned Size); }; } // end namespace clang. diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h index 60c4c1ce788e5..d8c0b624ef71e 100644 --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -289,9 +289,6 @@ class ASTContext : public RefCountedBase { /// Mapping from GUIDs to the corresponding MSGuidDecl. mutable llvm::FoldingSet MSGuidDecls; - /// Used to cleanups APValues stored in the AST. - mutable llvm::SmallVector APValueCleanups; - /// A cache mapping a string value to a StringLiteral object with the same /// value. /// diff --git a/clang/include/clang/AST/ASTImporter.h b/clang/include/clang/AST/ASTImporter.h index 205d7ec67754f..a6d822ba2ea6d 100644 --- a/clang/include/clang/AST/ASTImporter.h +++ b/clang/include/clang/AST/ASTImporter.h @@ -14,6 +14,7 @@ #ifndef LLVM_CLANG_AST_ASTIMPORTER_H #define LLVM_CLANG_AST_ASTIMPORTER_H +#include "clang/AST/APValue.h" #include "clang/AST/DeclBase.h" #include "clang/AST/DeclarationName.h" #include "clang/AST/ExprCXX.h" @@ -503,6 +504,13 @@ class TypeSourceInfo; /// "to" context, or the import error. llvm::Expected Import(const CXXBaseSpecifier *FromSpec); + /// Import the given APValue from the "from" context into + /// the "to" context. + /// + /// \return the equivalent APValue in the "to" context or the import + /// error. + llvm::Expected Import(const APValue &FromValue); + /// Import the definition of the given declaration, including all of /// the declarations it contains. LLVM_NODISCARD llvm::Error ImportDefinition(Decl *From); diff --git a/clang/lib/AST/APValue.cpp b/clang/lib/AST/APValue.cpp index 8d402ee8e3dca..919cd86ea9cfd 100644 --- a/clang/lib/AST/APValue.cpp +++ b/clang/lib/AST/APValue.cpp @@ -882,17 +882,26 @@ void APValue::setLValue(LValueBase B, const CharUnits &O, NoLValuePath, LVal.IsNullPtr = IsNullPtr; } -void APValue::setLValue(LValueBase B, const CharUnits &O, - ArrayRef Path, bool IsOnePastTheEnd, - bool IsNullPtr) { +MutableArrayRef +APValue::setLValueUninit(LValueBase B, const CharUnits &O, unsigned Size, + bool IsOnePastTheEnd, bool IsNullPtr) { assert(isLValue() && "Invalid accessor"); - LV &LVal = *((LV*)(char*)Data.buffer); + LV &LVal = *((LV *)(char *)Data.buffer); LVal.Base = B; LVal.IsOnePastTheEnd = IsOnePastTheEnd; LVal.Offset = O; - LVal.resizePath(Path.size()); - memcpy(LVal.getPath(), Path.data(), Path.size() * sizeof(LValuePathEntry)); LVal.IsNullPtr = IsNullPtr; + LVal.resizePath(Size); + return {LVal.getPath(), Size}; +} + +void APValue::setLValue(LValueBase B, const CharUnits &O, + ArrayRef Path, bool IsOnePastTheEnd, + bool IsNullPtr) { + MutableArrayRef InternalPath = + setLValueUninit(B, O, Path.size(), IsOnePastTheEnd, IsNullPtr); + memcpy(InternalPath.data(), Path.data(), + Path.size() * sizeof(LValuePathEntry)); } const ValueDecl *APValue::getMemberPointerDecl() const { @@ -929,15 +938,27 @@ void APValue::MakeArray(unsigned InitElts, unsigned Size) { Kind = Array; } -void APValue::MakeMemberPointer(const ValueDecl *Member, bool IsDerivedMember, - ArrayRef Path) { +MutableArrayRef +setLValueUninit(APValue::LValueBase B, const CharUnits &O, unsigned Size, + bool OnePastTheEnd, bool IsNullPtr); + +MutableArrayRef +APValue::setMemberPointerUninit(const ValueDecl *Member, bool IsDerivedMember, + unsigned Size) { assert(isAbsent() && "Bad state change"); - MemberPointerData *MPD = new ((void*)(char*)Data.buffer) MemberPointerData; + MemberPointerData *MPD = new ((void *)(char *)Data.buffer) MemberPointerData; Kind = MemberPointer; MPD->MemberAndIsDerivedMember.setPointer( Member ? cast(Member->getCanonicalDecl()) : nullptr); MPD->MemberAndIsDerivedMember.setInt(IsDerivedMember); - MPD->resizePath(Path.size()); + MPD->resizePath(Size); + return {MPD->getPath(), MPD->PathLength}; +} + +void APValue::MakeMemberPointer(const ValueDecl *Member, bool IsDerivedMember, + ArrayRef Path) { + MutableArrayRef InternalPath = + setMemberPointerUninit(Member, IsDerivedMember, Path.size()); for (unsigned I = 0; I != Path.size(); ++I) - MPD->getPath()[I] = Path[I]->getCanonicalDecl(); + InternalPath[I] = Path[I]->getCanonicalDecl(); } diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index bddaaa6e19ecf..32bb3f991d959 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -1005,9 +1005,6 @@ ASTContext::~ASTContext() { for (const auto &Value : ModuleInitializers) Value.second->~PerModuleInitializers(); - - for (APValue *Value : APValueCleanups) - Value->~APValue(); } void ASTContext::setTraversalScope(const std::vector &TopLevelDecls) { diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index 5a70d0ee95906..23720bf75a656 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -397,6 +397,7 @@ namespace clang { Error ImportImplicitMethods(const CXXRecordDecl *From, CXXRecordDecl *To); Expected ImportCastPath(CastExpr *E); + Expected ImportAPValue(const APValue &FromValue); using Designator = DesignatedInitExpr::Designator; @@ -6692,18 +6693,11 @@ ExpectedStmt ASTNodeImporter::VisitAddrLabelExpr(AddrLabelExpr *E) { ExpectedStmt ASTNodeImporter::VisitConstantExpr(ConstantExpr *E) { Error Err = Error::success(); auto ToSubExpr = importChecked(Err, E->getSubExpr()); + auto ToResult = importChecked(Err, E->getAPValueResult()); if (Err) return std::move(Err); - // TODO : Handle APValue::ValueKind that require importing. - - APValue::ValueKind Kind = E->getResultAPValueKind(); - if (Kind == APValue::Int || Kind == APValue::Float || - Kind == APValue::FixedPoint || Kind == APValue::ComplexFloat || - Kind == APValue::ComplexInt) - return ConstantExpr::Create(Importer.getToContext(), ToSubExpr, - E->getAPValueResult()); - return ConstantExpr::Create(Importer.getToContext(), ToSubExpr); + return ConstantExpr::Create(Importer.getToContext(), ToSubExpr, ToResult); } ExpectedStmt ASTNodeImporter::VisitParenExpr(ParenExpr *E) { Error Err = Error::success(); @@ -8804,6 +8798,11 @@ ASTImporter::Import(const CXXBaseSpecifier *BaseSpec) { return Imported; } +llvm::Expected ASTImporter::Import(const APValue &FromValue) { + ASTNodeImporter Importer(*this); + return Importer.ImportAPValue(FromValue); +} + Error ASTImporter::ImportDefinition(Decl *From) { ExpectedDecl ToOrErr = Import(From); if (!ToOrErr) @@ -8934,6 +8933,172 @@ Expected ASTImporter::Import(Selector FromSel) { return ToContext.Selectors.getSelector(FromSel.getNumArgs(), Idents.data()); } +llvm::Expected +ASTNodeImporter::ImportAPValue(const APValue &FromValue) { + APValue Result; + llvm::Error Err = llvm::Error::success(); + auto ImportLoop = [&](const APValue *From, APValue *To, unsigned Size) { + for (unsigned Idx = 0; Idx < Size; Idx++) { + APValue Tmp = importChecked(Err, From[Idx]); + To[Idx] = Tmp; + } + }; + switch (FromValue.getKind()) { + case APValue::None: + case APValue::Indeterminate: + case APValue::Int: + case APValue::Float: + case APValue::FixedPoint: + case APValue::ComplexInt: + case APValue::ComplexFloat: + Result = FromValue; + break; + case APValue::Vector: { + Result.MakeVector(); + MutableArrayRef Elts = + Result.setVectorUninit(FromValue.getVectorLength()); + ImportLoop( + ((const APValue::Vec *)(const char *)FromValue.Data.buffer)->Elts, + Elts.data(), FromValue.getVectorLength()); + break; + } + case APValue::Array: + Result.MakeArray(FromValue.getArrayInitializedElts(), + FromValue.getArraySize()); + ImportLoop( + ((const APValue::Arr *)(const char *)FromValue.Data.buffer)->Elts, + ((const APValue::Arr *)(const char *)Result.Data.buffer)->Elts, + FromValue.getArrayInitializedElts()); + break; + case APValue::Struct: + Result.MakeStruct(FromValue.getStructNumBases(), + FromValue.getStructNumFields()); + ImportLoop( + ((const APValue::StructData *)(const char *)FromValue.Data.buffer) + ->Elts, + ((const APValue::StructData *)(const char *)Result.Data.buffer)->Elts, + FromValue.getStructNumBases() + FromValue.getStructNumFields()); + break; + case APValue::Union: { + Result.MakeUnion(); + const Decl *ImpFDecl = importChecked(Err, FromValue.getUnionField()); + APValue ImpValue = importChecked(Err, FromValue.getUnionValue()); + if (Err) + return std::move(Err); + Result.setUnion(cast(ImpFDecl), ImpValue); + break; + } + case APValue::AddrLabelDiff: { + Result.MakeAddrLabelDiff(); + const Expr *ImpLHS = importChecked(Err, FromValue.getAddrLabelDiffLHS()); + const Expr *ImpRHS = importChecked(Err, FromValue.getAddrLabelDiffRHS()); + if (Err) + return std::move(Err); + Result.setAddrLabelDiff(cast(ImpLHS), + cast(ImpRHS)); + break; + } + case APValue::MemberPointer: { + const Decl *ImpMemPtrDecl = + importChecked(Err, FromValue.getMemberPointerDecl()); + if (Err) + return std::move(Err); + MutableArrayRef ToPath = + Result.setMemberPointerUninit( + cast(ImpMemPtrDecl), + FromValue.isMemberPointerToDerivedMember(), + FromValue.getMemberPointerPath().size()); + llvm::ArrayRef FromPath = + Result.getMemberPointerPath(); + for (unsigned Idx = 0; Idx < FromValue.getMemberPointerPath().size(); + Idx++) { + const Decl *ImpDecl = importChecked(Err, FromPath[Idx]); + if (Err) + return std::move(Err); + ToPath[Idx] = cast(ImpDecl->getCanonicalDecl()); + } + break; + } + case APValue::LValue: + APValue::LValueBase Base; + QualType FromElemTy; + if (FromValue.getLValueBase()) { + assert(!FromValue.getLValueBase().is() && + "in C++20 dynamic allocation are transient so they shouldn't " + "appear in the AST"); + if (!FromValue.getLValueBase().is()) { + if (const auto *E = + FromValue.getLValueBase().dyn_cast()) { + FromElemTy = E->getType(); + const Expr *ImpExpr = importChecked(Err, E); + if (Err) + return std::move(Err); + Base = APValue::LValueBase(ImpExpr, + FromValue.getLValueBase().getCallIndex(), + FromValue.getLValueBase().getVersion()); + } else { + FromElemTy = + FromValue.getLValueBase().get()->getType(); + const Decl *ImpDecl = importChecked( + Err, FromValue.getLValueBase().get()); + if (Err) + return std::move(Err); + Base = APValue::LValueBase(cast(ImpDecl), + FromValue.getLValueBase().getCallIndex(), + FromValue.getLValueBase().getVersion()); + } + } else { + FromElemTy = FromValue.getLValueBase().getTypeInfoType(); + QualType ImpTypeInfo = importChecked( + Err, + QualType(FromValue.getLValueBase().get().getType(), + 0)); + QualType ImpType = + importChecked(Err, FromValue.getLValueBase().getTypeInfoType()); + if (Err) + return std::move(Err); + Base = APValue::LValueBase::getTypeInfo( + TypeInfoLValue(ImpTypeInfo.getTypePtr()), ImpType); + } + } + CharUnits Offset = FromValue.getLValueOffset(); + unsigned PathLength = FromValue.getLValuePath().size(); + Result.MakeLValue(); + if (FromValue.hasLValuePath()) { + MutableArrayRef ToPath = Result.setLValueUninit( + Base, Offset, PathLength, FromValue.isLValueOnePastTheEnd(), + FromValue.isNullPointer()); + llvm::ArrayRef FromPath = + FromValue.getLValuePath(); + for (unsigned LoopIdx = 0; LoopIdx < PathLength; LoopIdx++) { + if (FromElemTy->isRecordType()) { + const Decl *FromDecl = + FromPath[LoopIdx].getAsBaseOrMember().getPointer(); + const Decl *ImpDecl = importChecked(Err, FromDecl); + if (Err) + return std::move(Err); + if (auto *RD = dyn_cast(FromDecl)) + FromElemTy = Importer.FromContext.getRecordType(RD); + else + FromElemTy = cast(FromDecl)->getType(); + ToPath[LoopIdx] = APValue::LValuePathEntry(APValue::BaseOrMemberType( + ImpDecl, FromPath[LoopIdx].getAsBaseOrMember().getInt())); + } else { + FromElemTy = + Importer.FromContext.getAsArrayType(FromElemTy)->getElementType(); + ToPath[LoopIdx] = APValue::LValuePathEntry::ArrayIndex( + FromPath[LoopIdx].getAsArrayIndex()); + } + } + } else + Result.setLValue(Base, Offset, APValue::NoLValuePath{}, + FromValue.isNullPointer()); + } + if (Err) + return std::move(Err); + return Result; +} + Expected ASTImporter::HandleNameConflict(DeclarationName Name, DeclContext *DC, unsigned IDNS, diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index 919d3220875c4..c6b2c47a48fb2 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -360,7 +360,6 @@ llvm::APSInt ConstantExpr::getResultAsAPSInt() const { } APValue ConstantExpr::getAPValueResult() const { - assert(hasAPValueResult()); switch (ConstantExprBits.ResultKind) { case ConstantExpr::RSK_APValue: @@ -370,6 +369,8 @@ APValue ConstantExpr::getAPValueResult() const { llvm::APSInt(llvm::APInt(ConstantExprBits.BitWidth, Int64Result()), ConstantExprBits.IsUnsigned)); case ConstantExpr::RSK_None: + if (ConstantExprBits.APValueKind == APValue::Indeterminate) + return APValue::IndeterminateValue(); return APValue(); } llvm_unreachable("invalid ResultKind"); diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index b2780db85166d..79fabfec54b4c 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -8963,48 +8963,146 @@ ReadFixedPointSemantics(const SmallVectorImpl &Record, HasUnsignedPadding); } -static const llvm::fltSemantics & -readAPFloatSemantics(ASTRecordReader &reader) { - return llvm::APFloatBase::EnumToSemantics( - static_cast(reader.readInt())); -} - APValue ASTRecordReader::readAPValue() { - unsigned Kind = readInt(); - switch ((APValue::ValueKind) Kind) { + auto Kind = static_cast(asImpl().readUInt32()); + switch (Kind) { case APValue::None: return APValue(); case APValue::Indeterminate: return APValue::IndeterminateValue(); case APValue::Int: - return APValue(readAPSInt()); + return APValue(asImpl().readAPSInt()); case APValue::Float: { - const llvm::fltSemantics &FloatSema = readAPFloatSemantics(*this); - return APValue(readAPFloat(FloatSema)); + const llvm::fltSemantics &FloatSema = llvm::APFloatBase::EnumToSemantics( + static_cast(asImpl().readUInt32())); + return APValue(asImpl().readAPFloat(FloatSema)); } case APValue::FixedPoint: { llvm::FixedPointSemantics FPSema = ReadFixedPointSemantics(Record, Idx); return APValue(llvm::APFixedPoint(readAPInt(), FPSema)); } case APValue::ComplexInt: { - llvm::APSInt First = readAPSInt(); - return APValue(std::move(First), readAPSInt()); + llvm::APSInt First = asImpl().readAPSInt(); + return APValue(std::move(First), asImpl().readAPSInt()); } case APValue::ComplexFloat: { - const llvm::fltSemantics &FloatSema1 = readAPFloatSemantics(*this); - llvm::APFloat First = readAPFloat(FloatSema1); - const llvm::fltSemantics &FloatSema2 = readAPFloatSemantics(*this); - return APValue(std::move(First), readAPFloat(FloatSema2)); - } - case APValue::LValue: - case APValue::Vector: - case APValue::Array: - case APValue::Struct: - case APValue::Union: - case APValue::MemberPointer: - case APValue::AddrLabelDiff: - // TODO : Handle all these APValue::ValueKind. - return APValue(); + const llvm::fltSemantics &FloatSema = llvm::APFloatBase::EnumToSemantics( + static_cast(asImpl().readUInt32())); + llvm::APFloat First = readAPFloat(FloatSema); + return APValue(std::move(First), asImpl().readAPFloat(FloatSema)); + } + case APValue::Vector: { + APValue Result; + Result.MakeVector(); + unsigned Length = asImpl().readUInt32(); + (void)Result.setVectorUninit(Length); + for (unsigned LoopIdx = 0; LoopIdx < Length; LoopIdx++) + Result.getVectorElt(LoopIdx) = asImpl().readAPValue(); + return Result; + } + case APValue::Array: { + APValue Result; + unsigned InitLength = asImpl().readUInt32(); + unsigned TotalLength = asImpl().readUInt32(); + Result.MakeArray(InitLength, TotalLength); + for (unsigned LoopIdx = 0; LoopIdx < InitLength; LoopIdx++) + Result.getArrayInitializedElt(LoopIdx) = asImpl().readAPValue(); + return Result; + } + case APValue::Struct: { + APValue Result; + unsigned BasesLength = asImpl().readUInt32(); + unsigned FieldsLength = asImpl().readUInt32(); + Result.MakeStruct(BasesLength, FieldsLength); + for (unsigned LoopIdx = 0; LoopIdx < BasesLength; LoopIdx++) + Result.getStructBase(LoopIdx) = asImpl().readAPValue(); + for (unsigned LoopIdx = 0; LoopIdx < FieldsLength; LoopIdx++) + Result.getStructField(LoopIdx) = asImpl().readAPValue(); + return Result; + } + case APValue::Union: { + auto *FDecl = asImpl().readDeclAs(); + APValue Value = asImpl().readAPValue(); + return APValue(FDecl, std::move(Value)); + } + case APValue::AddrLabelDiff: { + auto *LHS = cast(asImpl().readExpr()); + auto *RHS = cast(asImpl().readExpr()); + return APValue(LHS, RHS); + } + case APValue::MemberPointer: { + APValue Result; + bool IsDerived = asImpl().readUInt32(); + auto *Member = asImpl().readDeclAs(); + unsigned PathSize = asImpl().readUInt32(); + const CXXRecordDecl **PathArray = + Result.setMemberPointerUninit(Member, IsDerived, PathSize).data(); + for (unsigned LoopIdx = 0; LoopIdx < PathSize; LoopIdx++) + PathArray[LoopIdx] = + asImpl().readDeclAs()->getCanonicalDecl(); + return Result; + } + case APValue::LValue: { + uint64_t Bits = asImpl().readUInt32(); + bool HasLValuePath = Bits & 0x1; + bool IsLValueOnePastTheEnd = Bits & 0x2; + bool IsExpr = Bits & 0x4; + bool IsTypeInfo = Bits & 0x8; + bool IsNullPtr = Bits & 0x10; + bool HasBase = Bits & 0x20; + APValue::LValueBase Base; + QualType ElemTy; + assert((!IsExpr || !IsTypeInfo) && "LValueBase cannot be both"); + if (HasBase) { + if (!IsTypeInfo) { + unsigned CallIndex = asImpl().readUInt32(); + unsigned Version = asImpl().readUInt32(); + if (IsExpr) { + Base = APValue::LValueBase(asImpl().readExpr(), CallIndex, Version); + ElemTy = Base.get()->getType(); + } else { + Base = APValue::LValueBase(asImpl().readDeclAs(), + CallIndex, Version); + ElemTy = Base.get()->getType(); + } + } else { + QualType TypeInfo = asImpl().readType(); + QualType Type = asImpl().readType(); + Base = APValue::LValueBase::getTypeInfo( + TypeInfoLValue(TypeInfo.getTypePtr()), Type); + Base.getTypeInfoType(); + } + } + CharUnits Offset = CharUnits::fromQuantity(asImpl().readUInt32()); + unsigned PathLength = asImpl().readUInt32(); + APValue Result; + Result.MakeLValue(); + if (HasLValuePath) { + APValue::LValuePathEntry *Path = + Result + .setLValueUninit(Base, Offset, PathLength, IsLValueOnePastTheEnd, + IsNullPtr) + .data(); + for (unsigned LoopIdx = 0; LoopIdx < PathLength; LoopIdx++) { + if (ElemTy->getAs()) { + unsigned Int = asImpl().readUInt32(); + Decl *D = asImpl().readDeclAs(); + if (auto *RD = dyn_cast(D)) + ElemTy = getASTContext().getRecordType(RD); + else + ElemTy = cast(D)->getType(); + Path[LoopIdx] = + APValue::LValuePathEntry(APValue::BaseOrMemberType(D, Int)); + } else { + ElemTy = getASTContext().getAsArrayType(ElemTy)->getElementType(); + Path[LoopIdx] = + APValue::LValuePathEntry::ArrayIndex(asImpl().readUInt32()); + } + } + } else + Result.setLValue(Base, Offset, APValue::NoLValuePath{}, IsNullPtr); + return Result; + } } llvm_unreachable("Invalid APValue::ValueKind"); } diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index ed00a3bc62812..bbc8248ae22e8 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -5155,22 +5155,103 @@ void ASTRecordWriter::AddAPValue(const APValue &Value) { return; } case APValue::ComplexFloat: { + assert(llvm::APFloatBase::SemanticsToEnum( + Value.getComplexFloatImag().getSemantics()) == + llvm::APFloatBase::SemanticsToEnum( + Value.getComplexFloatReal().getSemantics())); push_back(static_cast(llvm::APFloatBase::SemanticsToEnum( Value.getComplexFloatReal().getSemantics()))); AddAPFloat(Value.getComplexFloatReal()); - push_back(static_cast(llvm::APFloatBase::SemanticsToEnum( - Value.getComplexFloatImag().getSemantics()))); AddAPFloat(Value.getComplexFloatImag()); return; } - case APValue::LValue: case APValue::Vector: + push_back(Value.getVectorLength()); + for (unsigned Idx = 0; Idx < Value.getVectorLength(); Idx++) + AddAPValue(Value.getVectorElt(Idx)); + return; case APValue::Array: + push_back(Value.getArrayInitializedElts()); + push_back(Value.getArraySize()); + for (unsigned Idx = 0; Idx < Value.getArrayInitializedElts(); Idx++) + AddAPValue(Value.getArrayInitializedElt(Idx)); + return; case APValue::Struct: + push_back(Value.getStructNumBases()); + push_back(Value.getStructNumFields()); + for (unsigned Idx = 0; Idx < Value.getStructNumBases(); Idx++) + AddAPValue(Value.getStructBase(Idx)); + for (unsigned Idx = 0; Idx < Value.getStructNumFields(); Idx++) + AddAPValue(Value.getStructField(Idx)); + return; case APValue::Union: - case APValue::MemberPointer: + AddDeclRef(Value.getUnionField()); + AddAPValue(Value.getUnionValue()); + return; case APValue::AddrLabelDiff: - // TODO : Handle all these APValue::ValueKind. + AddStmt(const_cast(Value.getAddrLabelDiffLHS())); + AddStmt(const_cast(Value.getAddrLabelDiffRHS())); + return; + case APValue::MemberPointer: { + push_back(Value.isMemberPointerToDerivedMember()); + AddDeclRef(Value.getMemberPointerDecl()); + ArrayRef RecordPath = Value.getMemberPointerPath(); + push_back(RecordPath.size()); + for (auto Elem : RecordPath) + AddDeclRef(Elem); + return; + } + case APValue::LValue: { + push_back(Value.hasLValuePath() | Value.isLValueOnePastTheEnd() << 1 | + Value.getLValueBase().is() << 2 | + Value.getLValueBase().is() << 3 | + Value.isNullPointer() << 4 | + static_cast(Value.getLValueBase()) << 5); + QualType ElemTy; + if (Value.getLValueBase()) { + assert(!Value.getLValueBase().is() && + "in C++20 dynamic allocation are transient so they shouldn't " + "appear in the AST"); + if (!Value.getLValueBase().is()) { + push_back(Value.getLValueBase().getCallIndex()); + push_back(Value.getLValueBase().getVersion()); + if (const auto *E = Value.getLValueBase().dyn_cast()) { + AddStmt(const_cast(E)); + ElemTy = E->getType(); + } else { + AddDeclRef(Value.getLValueBase().get()); + ElemTy = Value.getLValueBase().get()->getType(); + } + } else { + AddTypeRef( + QualType(Value.getLValueBase().get().getType(), 0)); + AddTypeRef(Value.getLValueBase().getTypeInfoType()); + ElemTy = Value.getLValueBase().getTypeInfoType(); + } + } + push_back(Value.getLValueOffset().getQuantity()); + push_back(Value.getLValuePath().size()); + if (Value.hasLValuePath()) { + ArrayRef Path = Value.getLValuePath(); + for (auto Elem : Path) { + if (ElemTy->getAs()) { + push_back(Elem.getAsBaseOrMember().getInt()); + const Decl *BaseOrMember = Elem.getAsBaseOrMember().getPointer(); + if (const auto *RD = dyn_cast(BaseOrMember)) { + AddDeclRef(RD); + ElemTy = Writer->Context->getRecordType(RD); + } else { + const auto *VD = cast(BaseOrMember); + AddDeclRef(VD); + ElemTy = VD->getType(); + } + } else { + push_back(Elem.getAsArrayIndex()); + ElemTy = Writer->Context->getAsArrayType(ElemTy)->getElementType(); + } + } + } + } return; } llvm_unreachable("Invalid APValue::ValueKind"); diff --git a/clang/test/ASTMerge/APValue/APValue.cpp b/clang/test/ASTMerge/APValue/APValue.cpp new file mode 100644 index 0000000000000..c8695fb6074bb --- /dev/null +++ b/clang/test/ASTMerge/APValue/APValue.cpp @@ -0,0 +1,462 @@ +// RUN: %clang_cc1 -std=gnu++2a -emit-pch %s -o %t.pch +// RUN: %clang_cc1 -std=gnu++2a %s -DEMIT -ast-merge %t.pch -ast-dump-all | FileCheck %s + +// XFAIL: * + +#ifndef EMIT +#define EMIT + +namespace Integer { + +consteval int fint() { + return 6789; +} + +int Unique_Int = fint(); +//CHECK: VarDecl {{.*}} Unique_Int +//CHECK-NEXT: ConstantExpr {{.*}} 'int' +//CHECK-NEXT: value: Int 6789 + +consteval __uint128_t fint128() { + return ((__uint128_t)0x75f17d6b3588f843 << 64) | 0xb13dea7c9c324e51; +} + +constexpr __uint128_t Unique_Int128 = fint128(); +//CHECK: VarDecl {{.*}} Unique_Int128 +//CHECK-NEXT: value: Int 156773562844924187900898496343692168785 +//CHECK-NEXT: ConstantExpr +//CHECK-NEXT: value: Int 156773562844924187900898496343692168785 + +} // namespace Integer + +namespace FloatingPoint { + +consteval double fdouble() { + return double(567890.67890); +} + +double Unique_Double = fdouble(); +//CHECK: VarDecl {{.*}} Unique_Double +//CHECK-NEXT: ConstantExpr {{.*}} +//CHECK-NEXT: value: Float 5.678907e+05 + +} // namespace FloatingPoint + +// FIXME: Add test for FixedPoint, ComplexInt, ComplexFloat, AddrLabelDiff. + +namespace Struct { + +struct B { + int i; + double d; +}; + +consteval B fB() { + return B{1, 0.7}; +} + +constexpr B Basic_Struct = fB(); +//CHECK: VarDecl {{.*}} Basic_Struct +//CHECK-NEXT: value: Struct +//CHECK-NEXT: fields: Int 1, Float 7.000000e-01 +//CHECK-NEXT: ImplicitCastExpr +//CHECK-NEXT: ConstantExpr +//CHECK-NEXT: value: Struct +//CHECK-NEXT: fields: Int 1, Float 7.000000e-01 + +struct C { + int i = 9; +}; + +struct A : B { + constexpr A(B b, int I, double D, C _c) : B(b), i(I), d(D), c(_c) {} + int i; + double d; + C c; +}; + +consteval A fA() { + return A(Basic_Struct, 1, 79.789, {}); +} + +A Advanced_Struct = fA(); +//CHECK: VarDecl {{.*}} Advanced_Struct +//CHECK-NEXT: ConstantExpr {{.*}} +//CHECK-NEXT: value: Struct +//CHECK-NEXT: base: Struct +//CHECK-NEXT: fields: Int 1, Float 7.000000e-01 +//CHECK-NEXT: fields: Int 1, Float 7.978900e+01 +//CHECK-NEXT: field: Struct +//CHECK-NEXT: field: Int 9 + +} // namespace Struct + +namespace Vector { + +using v4si = int __attribute__((__vector_size__(16))); + +consteval v4si fv4si() { + return (v4si){8, 2, 3}; +} + +v4si Vector_Int = fv4si(); +//CHECK: VarDecl {{.*}} Vector_Int +//CHECK-NEXT: ConstantExpr +//CHECK-NEXT: value: Vector length=4 +//CHECK-NEXT: elements: Int 8, Int 2, Int 3, Int 0 + +} // namespace Vector + +namespace Array { + +struct B { + int arr[6]; +}; + +consteval B fint() { + return B{1, 2, 3, 4, 5, 6}; +} + +B Array_Int = fint(); +//CHECK: VarDecl {{.*}} Array_Int +//CHECK-NEXT: ConstantExpr +//CHECK-NEXT: value: Struct +//CHECK-NEXT: field: Array size=6 +//CHECK-NEXT: elements: Int 1, Int 2, Int 3, Int 4 +//CHECK-NEXT: elements: Int 5, Int 6 + +struct A { + int i = 789; + double d = 67890.09876; +}; + +struct C { + A arr[3]; +}; + +consteval C fA() { + return {{A{}, A{-45678, 9.8}, A{9}}}; +} + +C Array2_Struct = fA(); +//CHECK: VarDecl {{.*}} Array2_Struct +//CHECK-NEXT: ConstantExpr {{.*}} + +using v4si = int __attribute__((__vector_size__(16))); + +struct D { + v4si arr[2]; +}; + +consteval D fv4si() { + return {{{1, 2, 3, 4}, {4, 5, 6, 7}}}; +} + +D Array_Vector = fv4si(); +//CHECK: VarDecl {{.*}} Array_Vector +//CHECK-NEXT: ConstantExpr {{.*}} +//CHECK-NEXT: value: Struct +//CHECK-NEXT: field: Array size=2 +//CHECK-NEXT: element: Vector length=4 +//CHECK-NEXT: elements: Int 1, Int 2, Int 3, Int 4 +//CHECK-NEXT: element: Vector length=4 +//CHECK-NEXT: elements: Int 4, Int 5, Int 6, Int 7 + +} // namespace Array + +namespace Union { + +struct A { + int i = 6789; + float f = 987.9876; +}; + +union U { + int i; + A a{567890, 9876.5678f}; +}; + +consteval U fU1() { + return U{0}; +} + +U Unique_Union1 = fU1(); +//CHECK: VarDecl {{.*}} Unique_Union +//CHECK-NEXT: ConstantExpr +//CHECK-NEXT: value: Union .i Int 0 + +consteval U fU() { + return U{}; +} + +U Unique_Union2 = fU(); +//CHECK: VarDecl {{.*}} Unique_Union +//CHECK-NEXT: ConstantExpr +//CHECK-NEXT: value: Union .a +//CHECK-NEXT: Struct +//CHECK-NEXT: fields: Int 567890, Float 9.876567e+03 + +} // namespace Union + +namespace MemberPointer { + +struct A { + struct B { + struct C { + struct D { + struct E { + struct F { + struct G { + int i; + }; + }; + }; + }; + }; + }; +}; + +consteval auto fmem_ptr() -> decltype(&A::B::C::D::E::F::G::i) { + return &A::B::C::D::E::F::G::i; +} + +auto MemberPointer1 = fmem_ptr(); +//CHECK: VarDecl {{.*}} MemberPointer1 +//CHECK-NEXT: ConstantExpr +//CHECK-NEXT: value: MemberPointer &G::i + +struct A1 { + struct B1 { + int f() const { + return 0; + } + }; +}; + +consteval auto fmem_ptr2() { + return &A1::B1::f; +} + +auto MemberPointer2 = fmem_ptr2(); +//CHECK: VarDecl {{.*}} MemberPointer2 +//CHECK-NEXT: ConstantExpr +//CHECK-NEXT: value: MemberPointer &B1::f + +} // namespace MemberPointer + +namespace std { +struct type_info; +}; + +namespace LValue { + +constexpr int g = 0; + +consteval const int &fg_ref() { + return g; +} + +const int &g_ref = fg_ref(); +//CHECK: VarDecl {{.*}} g_ref +//CHECK-NEXT: ConstantExpr +//CHECK-NEXT: value: LValue &g + +consteval const int *fint_ptr() { + return &g; +} + +const int *g_ptr = fint_ptr(); +//CHECK: VarDecl {{.*}} g_ptr +//CHECK-NEXT: ConstantExpr +//CHECK-NEXT: value: LValue &g + +consteval const int *fnull_ptr() { + return nullptr; +} + +const int *ptr2 = fnull_ptr(); +//CHECK: VarDecl {{.*}} ptr2 +//CHECK-NEXT: ConstantExpr +//CHECK-NEXT: value: LValue nullptr + +int fconst(); + +consteval auto ffunc_ptr() { + return &fconst; +} + +int (*func_ptr)() = ffunc_ptr(); +//CHECK: VarDecl {{.*}} func_ptr +//CHECK-NEXT: ConstantExpr {{.*}} +//CHECK-NEXT: value: LValue &fconst + +struct A { + int Arr[6] = {0, 1, 3, 4, 5, 9}; + int i = 0; +}; + +struct D { + A arr[6] = {}; +}; + +consteval D fA() { + return {}; +} + +constexpr D Arr = fA(); +// CHECK: VarDecl {{.*}} Arr +// CHECK-NEXT: value: Struct +// CHECK-NEXT: field: Array size=6 +// CHECK-NEXT: element: Struct +// CHECK-NEXT: field: Array size=6 +// CHECK-NEXT: elements: Int 0, Int 1, Int 3, Int 4 +// CHECK-NEXT: elements: Int 5, Int 9 +// CHECK-NEXT: field: Int 0 +// CHECK-NEXT: element: Struct +// CHECK-NEXT: field: Array size=6 +// CHECK-NEXT: elements: Int 0, Int 1, Int 3, Int 4 +// CHECK-NEXT: elements: Int 5, Int 9 +// CHECK-NEXT: field: Int 0 +// CHECK-NEXT: element: Struct +// CHECK-NEXT: field: Array size=6 +// CHECK-NEXT: elements: Int 0, Int 1, Int 3, Int 4 +// CHECK-NEXT: elements: Int 5, Int 9 +// CHECK-NEXT: field: Int 0 +// CHECK-NEXT: element: Struct +// CHECK-NEXT: field: Array size=6 +// CHECK-NEXT: elements: Int 0, Int 1, Int 3, Int 4 +// CHECK-NEXT: elements: Int 5, Int 9 +// CHECK-NEXT: field: Int 0 +// CHECK-NEXT: element: Struct +// CHECK-NEXT: field: Array size=6 +// CHECK-NEXT: elements: Int 0, Int 1, Int 3, Int 4 +// CHECK-NEXT: elements: Int 5, Int 9 +// CHECK-NEXT: field: Int 0 +// CHECK-NEXT: element: Struct +// CHECK-NEXT: field: Array size=6 +// CHECK-NEXT: elements: Int 0, Int 1, Int 3, Int 4 +// CHECK-NEXT: elements: Int 5, Int 9 +// CHECK-NEXT: field: Int 0 +// CHECK-NEXT: ImplicitCastExpr +// CHECK-NEXT: ConstantExpr +// CHECK-NEXT: value: Struct +// CHECK-NEXT: field: Array size=6 +// CHECK-NEXT: element: Struct +// CHECK-NEXT: field: Array size=6 +// CHECK-NEXT: elements: Int 0, Int 1, Int 3, Int 4 +// CHECK-NEXT: elements: Int 5, Int 9 +// CHECK-NEXT: field: Int 0 +// CHECK-NEXT: element: Struct +// CHECK-NEXT: field: Array size=6 +// CHECK-NEXT: elements: Int 0, Int 1, Int 3, Int 4 +// CHECK-NEXT: elements: Int 5, Int 9 +// CHECK-NEXT: field: Int 0 +// CHECK-NEXT: element: Struct +// CHECK-NEXT: field: Array size=6 +// CHECK-NEXT: elements: Int 0, Int 1, Int 3, Int 4 +// CHECK-NEXT: elements: Int 5, Int 9 +// CHECK-NEXT: field: Int 0 +// CHECK-NEXT: element: Struct +// CHECK-NEXT: field: Array size=6 +// CHECK-NEXT: elements: Int 0, Int 1, Int 3, Int 4 +// CHECK-NEXT: elements: Int 5, Int 9 +// CHECK-NEXT: field: Int 0 +// CHECK-NEXT: element: Struct +// CHECK-NEXT: field: Array size=6 +// CHECK-NEXT: elements: Int 0, Int 1, Int 3, Int 4 +// CHECK-NEXT: elements: Int 5, Int 9 +// CHECK-NEXT: field: Int 0 +// CHECK-NEXT: element: Struct +// CHECK-NEXT: field: Array size=6 +// CHECK-NEXT: elements: Int 0, Int 1, Int 3, Int 4 +// CHECK-NEXT: elements: Int 5, Int 9 +// CHECK-NEXT: field: Int 0 + +consteval const int &fconstintref() { + return Arr.arr[0].i; +} + +const int &ArrayStructRef1 = fconstintref(); +//CHECK: VarDecl {{.*}} ArrayStructRef1 +//CHECK-NEXT: ConstantExpr +//CHECK-NEXT: value: LValue &Arr.arr[0].i + +consteval const int &fconstintref2() { + return Arr.arr[1].Arr[5]; +} + +const int &ArrayStructRef2 = fconstintref2(); +//CHECK: VarDecl {{.*}} ArrayStructRef2 +//CHECK-NEXT: ConstantExpr +//CHECK-NEXT: value: LValue &Arr.arr[1].Arr[5] + +consteval const int *fconststar() { + return &ArrayStructRef2; +} + +const int *ArrayStructRef3 = fconststar(); +//CHECK: VarDecl {{.*}} ArrayStructRef3 +//CHECK-NEXT: ConstantExpr +//CHECK-NEXT: value: LValue &Arr.arr[1].Arr[5] + +struct B : A { +}; + +struct C { + B b; +}; + +consteval C fC() { + return {}; +} + +C c = fC(); +//CHECK: VarDecl {{.*}} c +//CHECK-NEXT: ConstantExpr +//CHECK-NEXT: value: Struct +//CHECK-NEXT: field: Struct +//CHECK-NEXT: base: Struct +//CHECK-NEXT: field: Array size=6 +//CHECK-NEXT: elements: Int 0, Int 1, Int 3, Int 4 +//CHECK-NEXT: elements: Int 5, Int 9 +//CHECK-NEXT: field: Int 0 + +consteval const int &f2constintref() { + return c.b.i; +} + +const int &StructPathRef = f2constintref(); +//CHECK: VarDecl {{.*}} StructPathRef +//CHECK-NEXT: ConstantExpr +//CHECK-NEXT: value: LValue &c.b.A::i + +consteval const std::type_info *ftype_info() { + return &typeid(c); +} + +const std::type_info *T1 = ftype_info(); +//CHECK: VarDecl {{.*}} T1 +//CHECK-NEXT: ConstantExpr +//CHECK-NEXT:value: LValue &typeid(LValue::C) + +consteval const std::type_info *ftype_info2() { + return &typeid(Arr.arr[1].Arr[2]); +} + +const std::type_info *T2 = ftype_info2(); +//CHECK: VarDecl {{.*}} T2 +//CHECK-NEXT: ConstantExpr +//CHECK-NEXT: value: LValue &typeid(int) + +consteval const char *fstring() { + return "test"; +} + +const char *cptr = fstring(); +//CHECK: VarDecl {{.*}} cptr +//CHECK-NEXT: ConstantExpr +//CHECK-NEXT: value: LValue &"test"[0] + +} // namespace LValue + +#endif From f4bd0f01312dbe6d441197cd484d6497da0e1598 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 21 Oct 2020 18:00:23 +0100 Subject: [PATCH 067/179] [X86] Regenerate old vector-variable-insertion test --- .../X86/2009-06-05-VariableIndexInsert.ll | 29 +++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/llvm/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll b/llvm/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll index 71a560a63ec52..535450a52ff60 100644 --- a/llvm/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll +++ b/llvm/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll @@ -1,7 +1,32 @@ -; RUN: llc < %s -; REQUIRES: default_triple +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- -mattr=+sse2 | FileCheck %s -check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s -check-prefix=X64 define <2 x i64> @_mm_insert_epi16(<2 x i64> %a, i32 %b, i32 %imm) nounwind readnone { +; X86-LABEL: _mm_insert_epi16: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $32, %esp +; X86-NEXT: movzwl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: andl $7, %ecx +; X86-NEXT: movaps %xmm0, (%esp) +; X86-NEXT: movw %ax, (%esp,%ecx,2) +; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: _mm_insert_epi16: +; X64: # %bb.0: # %entry +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: andl $7, %esi +; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movw %di, -24(%rsp,%rsi,2) +; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; X64-NEXT: retq entry: %conv = bitcast <2 x i64> %a to <8 x i16> ; <<8 x i16>> [#uses=1] %conv2 = trunc i32 %b to i16 ; [#uses=1] From cdc90ec7876d3d6b343aa2917e6e3d5436c0a99e Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 21 Oct 2020 18:10:33 +0100 Subject: [PATCH 068/179] [X86] Improve variable vector insertion test coverage. Add tests for insertion into non-undef vectors Add SSE2/AVX512F/AVX512BW coverage --- .../CodeGen/X86/insertelement-var-index.ll | 1008 +++++++++++++++-- 1 file changed, 903 insertions(+), 105 deletions(-) diff --git a/llvm/test/CodeGen/X86/insertelement-var-index.ll b/llvm/test/CodeGen/X86/insertelement-var-index.ll index 75ed52588d505..9acc259dc3254 100644 --- a/llvm/test/CodeGen/X86/insertelement-var-index.ll +++ b/llvm/test/CodeGen/X86/insertelement-var-index.ll @@ -1,7 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 | FileCheck %s --check-prefixes=ALL,SSE +; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=ALL,SSE,SSE2 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 | FileCheck %s --check-prefixes=ALL,SSE,SSE41 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=ALL,AVX,AVX1 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=ALL,AVX,AVX2 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=ALL,AVX,AVX512,AVX512F +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefixes=ALL,AVX,AVX512,AVX512BW define <16 x i8> @undef_index(i8 %x) nounwind { ; ALL-LABEL: undef_index: @@ -19,124 +22,178 @@ define <16 x i8> @undef_scalar(<16 x i8> %x, i32 %index) nounwind { ret <16 x i8> %ins } -define <16 x i8> @arg_i8_v16i8(i8 %x, i32 %y) nounwind { -; SSE-LABEL: arg_i8_v16i8: -; SSE: # %bb.0: -; SSE-NEXT: movd %edi, %xmm0 -; SSE-NEXT: pxor %xmm1, %xmm1 -; SSE-NEXT: pshufb %xmm1, %xmm0 -; SSE-NEXT: retq ; -; AVX1-LABEL: arg_i8_v16i8: +; Insertion into undef vectors +; + +define <16 x i8> @arg_i8_v16i8_undef(i8 %x, i32 %y) nounwind { +; SSE2-LABEL: arg_i8_v16i8_undef: +; SSE2: # %bb.0: +; SSE2-NEXT: movd %edi, %xmm0 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] +; SSE2-NEXT: retq +; +; SSE41-LABEL: arg_i8_v16i8_undef: +; SSE41: # %bb.0: +; SSE41-NEXT: movd %edi, %xmm0 +; SSE41-NEXT: pxor %xmm1, %xmm1 +; SSE41-NEXT: pshufb %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; AVX1-LABEL: arg_i8_v16i8_undef: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovd %edi, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: arg_i8_v16i8: +; AVX2-LABEL: arg_i8_v16i8_undef: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovd %edi, %xmm0 ; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0 ; AVX2-NEXT: retq +; +; AVX512F-LABEL: arg_i8_v16i8_undef: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vmovd %edi, %xmm0 +; AVX512F-NEXT: vpbroadcastb %xmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: arg_i8_v16i8_undef: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpbroadcastb %edi, %xmm0 +; AVX512BW-NEXT: retq %ins = insertelement <16 x i8> undef, i8 %x, i32 %y ret <16 x i8> %ins } -define <8 x i16> @arg_i16_v8i16(i16 %x, i32 %y) nounwind { -; SSE-LABEL: arg_i16_v8i16: +define <8 x i16> @arg_i16_v8i16_undef(i16 %x, i32 %y) nounwind { +; SSE-LABEL: arg_i16_v8i16_undef: ; SSE: # %bb.0: ; SSE-NEXT: movd %edi, %xmm0 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE-NEXT: retq ; -; AVX1-LABEL: arg_i16_v8i16: +; AVX1-LABEL: arg_i16_v8i16_undef: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovd %edi, %xmm0 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; AVX1-NEXT: retq ; -; AVX2-LABEL: arg_i16_v8i16: +; AVX2-LABEL: arg_i16_v8i16_undef: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovd %edi, %xmm0 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 ; AVX2-NEXT: retq +; +; AVX512F-LABEL: arg_i16_v8i16_undef: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vmovd %edi, %xmm0 +; AVX512F-NEXT: vpbroadcastw %xmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: arg_i16_v8i16_undef: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpbroadcastw %edi, %xmm0 +; AVX512BW-NEXT: retq %ins = insertelement <8 x i16> undef, i16 %x, i32 %y ret <8 x i16> %ins } -define <4 x i32> @arg_i32_v4i32(i32 %x, i32 %y) nounwind { -; SSE-LABEL: arg_i32_v4i32: +define <4 x i32> @arg_i32_v4i32_undef(i32 %x, i32 %y) nounwind { +; SSE-LABEL: arg_i32_v4i32_undef: ; SSE: # %bb.0: ; SSE-NEXT: movd %edi, %xmm0 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE-NEXT: retq ; -; AVX1-LABEL: arg_i32_v4i32: +; AVX1-LABEL: arg_i32_v4i32_undef: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovd %edi, %xmm0 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; AVX1-NEXT: retq ; -; AVX2-LABEL: arg_i32_v4i32: +; AVX2-LABEL: arg_i32_v4i32_undef: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovd %edi, %xmm0 ; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0 ; AVX2-NEXT: retq +; +; AVX512-LABEL: arg_i32_v4i32_undef: +; AVX512: # %bb.0: +; AVX512-NEXT: vpbroadcastd %edi, %xmm0 +; AVX512-NEXT: retq %ins = insertelement <4 x i32> undef, i32 %x, i32 %y ret <4 x i32> %ins } -define <2 x i64> @arg_i64_v2i64(i64 %x, i32 %y) nounwind { -; SSE-LABEL: arg_i64_v2i64: +define <2 x i64> @arg_i64_v2i64_undef(i64 %x, i32 %y) nounwind { +; SSE-LABEL: arg_i64_v2i64_undef: ; SSE: # %bb.0: ; SSE-NEXT: movq %rdi, %xmm0 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] ; SSE-NEXT: retq ; -; AVX1-LABEL: arg_i64_v2i64: +; AVX1-LABEL: arg_i64_v2i64_undef: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovq %rdi, %xmm0 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] ; AVX1-NEXT: retq ; -; AVX2-LABEL: arg_i64_v2i64: +; AVX2-LABEL: arg_i64_v2i64_undef: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovq %rdi, %xmm0 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0 ; AVX2-NEXT: retq +; +; AVX512-LABEL: arg_i64_v2i64_undef: +; AVX512: # %bb.0: +; AVX512-NEXT: vpbroadcastq %rdi, %xmm0 +; AVX512-NEXT: retq %ins = insertelement <2 x i64> undef, i64 %x, i32 %y ret <2 x i64> %ins } -define <4 x float> @arg_f32_v4f32(float %x, i32 %y) nounwind { -; SSE-LABEL: arg_f32_v4f32: +define <4 x float> @arg_f32_v4f32_undef(float %x, i32 %y) nounwind { +; SSE-LABEL: arg_f32_v4f32_undef: ; SSE: # %bb.0: ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE-NEXT: retq ; -; AVX1-LABEL: arg_f32_v4f32: +; AVX1-LABEL: arg_f32_v4f32_undef: ; AVX1: # %bb.0: ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] ; AVX1-NEXT: retq ; -; AVX2-LABEL: arg_f32_v4f32: +; AVX2-LABEL: arg_f32_v4f32_undef: ; AVX2: # %bb.0: ; AVX2-NEXT: vbroadcastss %xmm0, %xmm0 ; AVX2-NEXT: retq +; +; AVX512-LABEL: arg_f32_v4f32_undef: +; AVX512: # %bb.0: +; AVX512-NEXT: vbroadcastss %xmm0, %xmm0 +; AVX512-NEXT: retq %ins = insertelement <4 x float> undef, float %x, i32 %y ret <4 x float> %ins } -define <2 x double> @arg_f64_v2f64(double %x, i32 %y) nounwind { -; SSE-LABEL: arg_f64_v2f64: -; SSE: # %bb.0: -; SSE-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] -; SSE-NEXT: retq +define <2 x double> @arg_f64_v2f64_undef(double %x, i32 %y) nounwind { +; SSE2-LABEL: arg_f64_v2f64_undef: +; SSE2: # %bb.0: +; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] +; SSE2-NEXT: retq ; -; AVX-LABEL: arg_f64_v2f64: +; SSE41-LABEL: arg_f64_v2f64_undef: +; SSE41: # %bb.0: +; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] +; SSE41-NEXT: retq +; +; AVX-LABEL: arg_f64_v2f64_undef: ; AVX: # %bb.0: ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] ; AVX-NEXT: retq @@ -144,16 +201,25 @@ define <2 x double> @arg_f64_v2f64(double %x, i32 %y) nounwind { ret <2 x double> %ins } -define <16 x i8> @load_i8_v16i8(i8* %p, i32 %y) nounwind { -; SSE-LABEL: load_i8_v16i8: -; SSE: # %bb.0: -; SSE-NEXT: movzbl (%rdi), %eax -; SSE-NEXT: movd %eax, %xmm0 -; SSE-NEXT: pxor %xmm1, %xmm1 -; SSE-NEXT: pshufb %xmm1, %xmm0 -; SSE-NEXT: retq +define <16 x i8> @load_i8_v16i8_undef(i8* %p, i32 %y) nounwind { +; SSE2-LABEL: load_i8_v16i8_undef: +; SSE2: # %bb.0: +; SSE2-NEXT: movzbl (%rdi), %eax +; SSE2-NEXT: movd %eax, %xmm0 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] +; SSE2-NEXT: retq ; -; AVX1-LABEL: load_i8_v16i8: +; SSE41-LABEL: load_i8_v16i8_undef: +; SSE41: # %bb.0: +; SSE41-NEXT: movzbl (%rdi), %eax +; SSE41-NEXT: movd %eax, %xmm0 +; SSE41-NEXT: pxor %xmm1, %xmm1 +; SSE41-NEXT: pshufb %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; AVX1-LABEL: load_i8_v16i8_undef: ; AVX1: # %bb.0: ; AVX1-NEXT: movzbl (%rdi), %eax ; AVX1-NEXT: vmovd %eax, %xmm0 @@ -161,17 +227,22 @@ define <16 x i8> @load_i8_v16i8(i8* %p, i32 %y) nounwind { ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: load_i8_v16i8: +; AVX2-LABEL: load_i8_v16i8_undef: ; AVX2: # %bb.0: ; AVX2-NEXT: vpbroadcastb (%rdi), %xmm0 ; AVX2-NEXT: retq +; +; AVX512-LABEL: load_i8_v16i8_undef: +; AVX512: # %bb.0: +; AVX512-NEXT: vpbroadcastb (%rdi), %xmm0 +; AVX512-NEXT: retq %x = load i8, i8* %p %ins = insertelement <16 x i8> undef, i8 %x, i32 %y ret <16 x i8> %ins } -define <8 x i16> @load_i16_v8i16(i16* %p, i32 %y) nounwind { -; SSE-LABEL: load_i16_v8i16: +define <8 x i16> @load_i16_v8i16_undef(i16* %p, i32 %y) nounwind { +; SSE-LABEL: load_i16_v8i16_undef: ; SSE: # %bb.0: ; SSE-NEXT: movzwl (%rdi), %eax ; SSE-NEXT: movd %eax, %xmm0 @@ -179,7 +250,7 @@ define <8 x i16> @load_i16_v8i16(i16* %p, i32 %y) nounwind { ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE-NEXT: retq ; -; AVX1-LABEL: load_i16_v8i16: +; AVX1-LABEL: load_i16_v8i16_undef: ; AVX1: # %bb.0: ; AVX1-NEXT: movzwl (%rdi), %eax ; AVX1-NEXT: vmovd %eax, %xmm0 @@ -187,23 +258,28 @@ define <8 x i16> @load_i16_v8i16(i16* %p, i32 %y) nounwind { ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; AVX1-NEXT: retq ; -; AVX2-LABEL: load_i16_v8i16: +; AVX2-LABEL: load_i16_v8i16_undef: ; AVX2: # %bb.0: ; AVX2-NEXT: vpbroadcastw (%rdi), %xmm0 ; AVX2-NEXT: retq +; +; AVX512-LABEL: load_i16_v8i16_undef: +; AVX512: # %bb.0: +; AVX512-NEXT: vpbroadcastw (%rdi), %xmm0 +; AVX512-NEXT: retq %x = load i16, i16* %p %ins = insertelement <8 x i16> undef, i16 %x, i32 %y ret <8 x i16> %ins } -define <4 x i32> @load_i32_v4i32(i32* %p, i32 %y) nounwind { -; SSE-LABEL: load_i32_v4i32: +define <4 x i32> @load_i32_v4i32_undef(i32* %p, i32 %y) nounwind { +; SSE-LABEL: load_i32_v4i32_undef: ; SSE: # %bb.0: ; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE-NEXT: retq ; -; AVX-LABEL: load_i32_v4i32: +; AVX-LABEL: load_i32_v4i32_undef: ; AVX: # %bb.0: ; AVX-NEXT: vbroadcastss (%rdi), %xmm0 ; AVX-NEXT: retq @@ -212,14 +288,14 @@ define <4 x i32> @load_i32_v4i32(i32* %p, i32 %y) nounwind { ret <4 x i32> %ins } -define <2 x i64> @load_i64_v2i64(i64* %p, i32 %y) nounwind { -; SSE-LABEL: load_i64_v2i64: +define <2 x i64> @load_i64_v2i64_undef(i64* %p, i32 %y) nounwind { +; SSE-LABEL: load_i64_v2i64_undef: ; SSE: # %bb.0: ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] ; SSE-NEXT: retq ; -; AVX-LABEL: load_i64_v2i64: +; AVX-LABEL: load_i64_v2i64_undef: ; AVX: # %bb.0: ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] ; AVX-NEXT: retq @@ -228,14 +304,14 @@ define <2 x i64> @load_i64_v2i64(i64* %p, i32 %y) nounwind { ret <2 x i64> %ins } -define <4 x float> @load_f32_v4f32(float* %p, i32 %y) nounwind { -; SSE-LABEL: load_f32_v4f32: +define <4 x float> @load_f32_v4f32_undef(float* %p, i32 %y) nounwind { +; SSE-LABEL: load_f32_v4f32_undef: ; SSE: # %bb.0: ; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE-NEXT: retq ; -; AVX-LABEL: load_f32_v4f32: +; AVX-LABEL: load_f32_v4f32_undef: ; AVX: # %bb.0: ; AVX-NEXT: vbroadcastss (%rdi), %xmm0 ; AVX-NEXT: retq @@ -244,13 +320,19 @@ define <4 x float> @load_f32_v4f32(float* %p, i32 %y) nounwind { ret <4 x float> %ins } -define <2 x double> @load_f64_v2f64(double* %p, i32 %y) nounwind { -; SSE-LABEL: load_f64_v2f64: -; SSE: # %bb.0: -; SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] -; SSE-NEXT: retq +define <2 x double> @load_f64_v2f64_undef(double* %p, i32 %y) nounwind { +; SSE2-LABEL: load_f64_v2f64_undef: +; SSE2: # %bb.0: +; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] +; SSE2-NEXT: retq ; -; AVX-LABEL: load_f64_v2f64: +; SSE41-LABEL: load_f64_v2f64_undef: +; SSE41: # %bb.0: +; SSE41-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] +; SSE41-NEXT: retq +; +; AVX-LABEL: load_f64_v2f64_undef: ; AVX: # %bb.0: ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] ; AVX-NEXT: retq @@ -259,8 +341,8 @@ define <2 x double> @load_f64_v2f64(double* %p, i32 %y) nounwind { ret <2 x double> %ins } -define <32 x i8> @arg_i8_v32i8(i8 %x, i32 %y) nounwind { -; SSE-LABEL: arg_i8_v32i8: +define <32 x i8> @arg_i8_v32i8_undef(i8 %x, i32 %y) nounwind { +; SSE-LABEL: arg_i8_v32i8_undef: ; SSE: # %bb.0: ; SSE-NEXT: # kill: def $esi killed $esi def $rsi ; SSE-NEXT: andl $31, %esi @@ -269,7 +351,7 @@ define <32 x i8> @arg_i8_v32i8(i8 %x, i32 %y) nounwind { ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 ; SSE-NEXT: retq ; -; AVX1-LABEL: arg_i8_v32i8: +; AVX1-LABEL: arg_i8_v32i8_undef: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovd %edi, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -277,17 +359,28 @@ define <32 x i8> @arg_i8_v32i8(i8 %x, i32 %y) nounwind { ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: arg_i8_v32i8: +; AVX2-LABEL: arg_i8_v32i8_undef: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovd %edi, %xmm0 ; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0 ; AVX2-NEXT: retq +; +; AVX512F-LABEL: arg_i8_v32i8_undef: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vmovd %edi, %xmm0 +; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0 +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: arg_i8_v32i8_undef: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpbroadcastb %edi, %ymm0 +; AVX512BW-NEXT: retq %ins = insertelement <32 x i8> undef, i8 %x, i32 %y ret <32 x i8> %ins } -define <16 x i16> @arg_i16_v16i16(i16 %x, i32 %y) nounwind { -; SSE-LABEL: arg_i16_v16i16: +define <16 x i16> @arg_i16_v16i16_undef(i16 %x, i32 %y) nounwind { +; SSE-LABEL: arg_i16_v16i16_undef: ; SSE: # %bb.0: ; SSE-NEXT: # kill: def $esi killed $esi def $rsi ; SSE-NEXT: andl $15, %esi @@ -296,7 +389,7 @@ define <16 x i16> @arg_i16_v16i16(i16 %x, i32 %y) nounwind { ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 ; SSE-NEXT: retq ; -; AVX1-LABEL: arg_i16_v16i16: +; AVX1-LABEL: arg_i16_v16i16_undef: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovd %edi, %xmm0 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] @@ -304,17 +397,28 @@ define <16 x i16> @arg_i16_v16i16(i16 %x, i32 %y) nounwind { ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: arg_i16_v16i16: +; AVX2-LABEL: arg_i16_v16i16_undef: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovd %edi, %xmm0 ; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0 ; AVX2-NEXT: retq +; +; AVX512F-LABEL: arg_i16_v16i16_undef: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vmovd %edi, %xmm0 +; AVX512F-NEXT: vpbroadcastw %xmm0, %ymm0 +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: arg_i16_v16i16_undef: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpbroadcastw %edi, %ymm0 +; AVX512BW-NEXT: retq %ins = insertelement <16 x i16> undef, i16 %x, i32 %y ret <16 x i16> %ins } -define <8 x i32> @arg_i32_v8i32(i32 %x, i32 %y) nounwind { -; SSE-LABEL: arg_i32_v8i32: +define <8 x i32> @arg_i32_v8i32_undef(i32 %x, i32 %y) nounwind { +; SSE-LABEL: arg_i32_v8i32_undef: ; SSE: # %bb.0: ; SSE-NEXT: # kill: def $esi killed $esi def $rsi ; SSE-NEXT: andl $7, %esi @@ -323,24 +427,29 @@ define <8 x i32> @arg_i32_v8i32(i32 %x, i32 %y) nounwind { ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 ; SSE-NEXT: retq ; -; AVX1-LABEL: arg_i32_v8i32: +; AVX1-LABEL: arg_i32_v8i32_undef: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovd %edi, %xmm0 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: arg_i32_v8i32: +; AVX2-LABEL: arg_i32_v8i32_undef: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovd %edi, %xmm0 ; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0 ; AVX2-NEXT: retq +; +; AVX512-LABEL: arg_i32_v8i32_undef: +; AVX512: # %bb.0: +; AVX512-NEXT: vpbroadcastd %edi, %ymm0 +; AVX512-NEXT: retq %ins = insertelement <8 x i32> undef, i32 %x, i32 %y ret <8 x i32> %ins } -define <4 x i64> @arg_i64_v4i64(i64 %x, i32 %y) nounwind { -; SSE-LABEL: arg_i64_v4i64: +define <4 x i64> @arg_i64_v4i64_undef(i64 %x, i32 %y) nounwind { +; SSE-LABEL: arg_i64_v4i64_undef: ; SSE: # %bb.0: ; SSE-NEXT: # kill: def $esi killed $esi def $rsi ; SSE-NEXT: andl $3, %esi @@ -349,24 +458,29 @@ define <4 x i64> @arg_i64_v4i64(i64 %x, i32 %y) nounwind { ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 ; SSE-NEXT: retq ; -; AVX1-LABEL: arg_i64_v4i64: +; AVX1-LABEL: arg_i64_v4i64_undef: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovq %rdi, %xmm0 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: arg_i64_v4i64: +; AVX2-LABEL: arg_i64_v4i64_undef: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovq %rdi, %xmm0 ; AVX2-NEXT: vpbroadcastq %xmm0, %ymm0 ; AVX2-NEXT: retq +; +; AVX512-LABEL: arg_i64_v4i64_undef: +; AVX512: # %bb.0: +; AVX512-NEXT: vpbroadcastq %rdi, %ymm0 +; AVX512-NEXT: retq %ins = insertelement <4 x i64> undef, i64 %x, i32 %y ret <4 x i64> %ins } -define <8 x float> @arg_f32_v8f32(float %x, i32 %y) nounwind { -; SSE-LABEL: arg_f32_v8f32: +define <8 x float> @arg_f32_v8f32_undef(float %x, i32 %y) nounwind { +; SSE-LABEL: arg_f32_v8f32_undef: ; SSE: # %bb.0: ; SSE-NEXT: # kill: def $edi killed $edi def $rdi ; SSE-NEXT: andl $7, %edi @@ -375,22 +489,27 @@ define <8 x float> @arg_f32_v8f32(float %x, i32 %y) nounwind { ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 ; SSE-NEXT: retq ; -; AVX1-LABEL: arg_f32_v8f32: +; AVX1-LABEL: arg_f32_v8f32_undef: ; AVX1: # %bb.0: ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: arg_f32_v8f32: +; AVX2-LABEL: arg_f32_v8f32_undef: ; AVX2: # %bb.0: ; AVX2-NEXT: vbroadcastss %xmm0, %ymm0 ; AVX2-NEXT: retq +; +; AVX512-LABEL: arg_f32_v8f32_undef: +; AVX512: # %bb.0: +; AVX512-NEXT: vbroadcastss %xmm0, %ymm0 +; AVX512-NEXT: retq %ins = insertelement <8 x float> undef, float %x, i32 %y ret <8 x float> %ins } -define <4 x double> @arg_f64_v4f64(double %x, i32 %y) nounwind { -; SSE-LABEL: arg_f64_v4f64: +define <4 x double> @arg_f64_v4f64_undef(double %x, i32 %y) nounwind { +; SSE-LABEL: arg_f64_v4f64_undef: ; SSE: # %bb.0: ; SSE-NEXT: # kill: def $edi killed $edi def $rdi ; SSE-NEXT: andl $3, %edi @@ -399,22 +518,27 @@ define <4 x double> @arg_f64_v4f64(double %x, i32 %y) nounwind { ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 ; SSE-NEXT: retq ; -; AVX1-LABEL: arg_f64_v4f64: +; AVX1-LABEL: arg_f64_v4f64_undef: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: arg_f64_v4f64: +; AVX2-LABEL: arg_f64_v4f64_undef: ; AVX2: # %bb.0: ; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0 ; AVX2-NEXT: retq +; +; AVX512-LABEL: arg_f64_v4f64_undef: +; AVX512: # %bb.0: +; AVX512-NEXT: vbroadcastsd %xmm0, %ymm0 +; AVX512-NEXT: retq %ins = insertelement <4 x double> undef, double %x, i32 %y ret <4 x double> %ins } -define <32 x i8> @load_i8_v32i8(i8* %p, i32 %y) nounwind { -; SSE-LABEL: load_i8_v32i8: +define <32 x i8> @load_i8_v32i8_undef(i8* %p, i32 %y) nounwind { +; SSE-LABEL: load_i8_v32i8_undef: ; SSE: # %bb.0: ; SSE-NEXT: # kill: def $esi killed $esi def $rsi ; SSE-NEXT: movb (%rdi), %al @@ -424,7 +548,7 @@ define <32 x i8> @load_i8_v32i8(i8* %p, i32 %y) nounwind { ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 ; SSE-NEXT: retq ; -; AVX1-LABEL: load_i8_v32i8: +; AVX1-LABEL: load_i8_v32i8_undef: ; AVX1: # %bb.0: ; AVX1-NEXT: movzbl (%rdi), %eax ; AVX1-NEXT: vmovd %eax, %xmm0 @@ -433,17 +557,22 @@ define <32 x i8> @load_i8_v32i8(i8* %p, i32 %y) nounwind { ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: load_i8_v32i8: +; AVX2-LABEL: load_i8_v32i8_undef: ; AVX2: # %bb.0: ; AVX2-NEXT: vpbroadcastb (%rdi), %ymm0 ; AVX2-NEXT: retq +; +; AVX512-LABEL: load_i8_v32i8_undef: +; AVX512: # %bb.0: +; AVX512-NEXT: vpbroadcastb (%rdi), %ymm0 +; AVX512-NEXT: retq %x = load i8, i8* %p %ins = insertelement <32 x i8> undef, i8 %x, i32 %y ret <32 x i8> %ins } -define <16 x i16> @load_i16_v16i16(i16* %p, i32 %y) nounwind { -; SSE-LABEL: load_i16_v16i16: +define <16 x i16> @load_i16_v16i16_undef(i16* %p, i32 %y) nounwind { +; SSE-LABEL: load_i16_v16i16_undef: ; SSE: # %bb.0: ; SSE-NEXT: # kill: def $esi killed $esi def $rsi ; SSE-NEXT: movzwl (%rdi), %eax @@ -453,7 +582,7 @@ define <16 x i16> @load_i16_v16i16(i16* %p, i32 %y) nounwind { ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 ; SSE-NEXT: retq ; -; AVX1-LABEL: load_i16_v16i16: +; AVX1-LABEL: load_i16_v16i16_undef: ; AVX1: # %bb.0: ; AVX1-NEXT: movzwl (%rdi), %eax ; AVX1-NEXT: vmovd %eax, %xmm0 @@ -462,17 +591,22 @@ define <16 x i16> @load_i16_v16i16(i16* %p, i32 %y) nounwind { ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: load_i16_v16i16: +; AVX2-LABEL: load_i16_v16i16_undef: ; AVX2: # %bb.0: ; AVX2-NEXT: vpbroadcastw (%rdi), %ymm0 ; AVX2-NEXT: retq +; +; AVX512-LABEL: load_i16_v16i16_undef: +; AVX512: # %bb.0: +; AVX512-NEXT: vpbroadcastw (%rdi), %ymm0 +; AVX512-NEXT: retq %x = load i16, i16* %p %ins = insertelement <16 x i16> undef, i16 %x, i32 %y ret <16 x i16> %ins } -define <8 x i32> @load_i32_v8i32(i32* %p, i32 %y) nounwind { -; SSE-LABEL: load_i32_v8i32: +define <8 x i32> @load_i32_v8i32_undef(i32* %p, i32 %y) nounwind { +; SSE-LABEL: load_i32_v8i32_undef: ; SSE: # %bb.0: ; SSE-NEXT: # kill: def $esi killed $esi def $rsi ; SSE-NEXT: movl (%rdi), %eax @@ -482,7 +616,7 @@ define <8 x i32> @load_i32_v8i32(i32* %p, i32 %y) nounwind { ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 ; SSE-NEXT: retq ; -; AVX-LABEL: load_i32_v8i32: +; AVX-LABEL: load_i32_v8i32_undef: ; AVX: # %bb.0: ; AVX-NEXT: vbroadcastss (%rdi), %ymm0 ; AVX-NEXT: retq @@ -491,8 +625,8 @@ define <8 x i32> @load_i32_v8i32(i32* %p, i32 %y) nounwind { ret <8 x i32> %ins } -define <4 x i64> @load_i64_v4i64(i64* %p, i32 %y) nounwind { -; SSE-LABEL: load_i64_v4i64: +define <4 x i64> @load_i64_v4i64_undef(i64* %p, i32 %y) nounwind { +; SSE-LABEL: load_i64_v4i64_undef: ; SSE: # %bb.0: ; SSE-NEXT: # kill: def $esi killed $esi def $rsi ; SSE-NEXT: movq (%rdi), %rax @@ -502,7 +636,7 @@ define <4 x i64> @load_i64_v4i64(i64* %p, i32 %y) nounwind { ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 ; SSE-NEXT: retq ; -; AVX-LABEL: load_i64_v4i64: +; AVX-LABEL: load_i64_v4i64_undef: ; AVX: # %bb.0: ; AVX-NEXT: vbroadcastsd (%rdi), %ymm0 ; AVX-NEXT: retq @@ -511,8 +645,8 @@ define <4 x i64> @load_i64_v4i64(i64* %p, i32 %y) nounwind { ret <4 x i64> %ins } -define <8 x float> @load_f32_v8f32(float* %p, i32 %y) nounwind { -; SSE-LABEL: load_f32_v8f32: +define <8 x float> @load_f32_v8f32_undef(float* %p, i32 %y) nounwind { +; SSE-LABEL: load_f32_v8f32_undef: ; SSE: # %bb.0: ; SSE-NEXT: # kill: def $esi killed $esi def $rsi ; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero @@ -522,7 +656,7 @@ define <8 x float> @load_f32_v8f32(float* %p, i32 %y) nounwind { ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 ; SSE-NEXT: retq ; -; AVX-LABEL: load_f32_v8f32: +; AVX-LABEL: load_f32_v8f32_undef: ; AVX: # %bb.0: ; AVX-NEXT: vbroadcastss (%rdi), %ymm0 ; AVX-NEXT: retq @@ -531,8 +665,8 @@ define <8 x float> @load_f32_v8f32(float* %p, i32 %y) nounwind { ret <8 x float> %ins } -define <4 x double> @load_f64_v4f64(double* %p, i32 %y) nounwind { -; SSE-LABEL: load_f64_v4f64: +define <4 x double> @load_f64_v4f64_undef(double* %p, i32 %y) nounwind { +; SSE-LABEL: load_f64_v4f64_undef: ; SSE: # %bb.0: ; SSE-NEXT: # kill: def $esi killed $esi def $rsi ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero @@ -542,7 +676,7 @@ define <4 x double> @load_f64_v4f64(double* %p, i32 %y) nounwind { ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 ; SSE-NEXT: retq ; -; AVX-LABEL: load_f64_v4f64: +; AVX-LABEL: load_f64_v4f64_undef: ; AVX: # %bb.0: ; AVX-NEXT: vbroadcastsd (%rdi), %ymm0 ; AVX-NEXT: retq @@ -551,6 +685,670 @@ define <4 x double> @load_f64_v4f64(double* %p, i32 %y) nounwind { ret <4 x double> %ins } +; +; Insertion into arg vectors +; + +define <16 x i8> @arg_i8_v16i8(<16 x i8> %v, i8 %x, i32 %y) nounwind { +; SSE-LABEL: arg_i8_v16i8: +; SSE: # %bb.0: +; SSE-NEXT: # kill: def $esi killed $esi def $rsi +; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: andl $15, %esi +; SSE-NEXT: movb %dil, -24(%rsp,%rsi) +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: arg_i8_v16i8: +; AVX: # %bb.0: +; AVX-NEXT: # kill: def $esi killed $esi def $rsi +; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; AVX-NEXT: andl $15, %esi +; AVX-NEXT: movb %dil, -24(%rsp,%rsi) +; AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 +; AVX-NEXT: retq + %ins = insertelement <16 x i8> %v, i8 %x, i32 %y + ret <16 x i8> %ins +} + +define <8 x i16> @arg_i16_v8i16(<8 x i16> %v, i16 %x, i32 %y) nounwind { +; SSE-LABEL: arg_i16_v8i16: +; SSE: # %bb.0: +; SSE-NEXT: # kill: def $esi killed $esi def $rsi +; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: andl $7, %esi +; SSE-NEXT: movw %di, -24(%rsp,%rsi,2) +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: arg_i16_v8i16: +; AVX: # %bb.0: +; AVX-NEXT: # kill: def $esi killed $esi def $rsi +; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; AVX-NEXT: andl $7, %esi +; AVX-NEXT: movw %di, -24(%rsp,%rsi,2) +; AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 +; AVX-NEXT: retq + %ins = insertelement <8 x i16> %v, i16 %x, i32 %y + ret <8 x i16> %ins +} + +define <4 x i32> @arg_i32_v4i32(<4 x i32> %v, i32 %x, i32 %y) nounwind { +; SSE-LABEL: arg_i32_v4i32: +; SSE: # %bb.0: +; SSE-NEXT: # kill: def $esi killed $esi def $rsi +; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: andl $3, %esi +; SSE-NEXT: movl %edi, -24(%rsp,%rsi,4) +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: arg_i32_v4i32: +; AVX: # %bb.0: +; AVX-NEXT: # kill: def $esi killed $esi def $rsi +; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; AVX-NEXT: andl $3, %esi +; AVX-NEXT: movl %edi, -24(%rsp,%rsi,4) +; AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 +; AVX-NEXT: retq + %ins = insertelement <4 x i32> %v, i32 %x, i32 %y + ret <4 x i32> %ins +} + +define <2 x i64> @arg_i64_v2i64(<2 x i64> %v, i64 %x, i32 %y) nounwind { +; SSE-LABEL: arg_i64_v2i64: +; SSE: # %bb.0: +; SSE-NEXT: # kill: def $esi killed $esi def $rsi +; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: andl $1, %esi +; SSE-NEXT: movq %rdi, -24(%rsp,%rsi,8) +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: arg_i64_v2i64: +; AVX: # %bb.0: +; AVX-NEXT: # kill: def $esi killed $esi def $rsi +; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; AVX-NEXT: andl $1, %esi +; AVX-NEXT: movq %rdi, -24(%rsp,%rsi,8) +; AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 +; AVX-NEXT: retq + %ins = insertelement <2 x i64> %v, i64 %x, i32 %y + ret <2 x i64> %ins +} + +define <4 x float> @arg_f32_v4f32(<4 x float> %v, float %x, i32 %y) nounwind { +; SSE-LABEL: arg_f32_v4f32: +; SSE: # %bb.0: +; SSE-NEXT: # kill: def $edi killed $edi def $rdi +; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: andl $3, %edi +; SSE-NEXT: movss %xmm1, -24(%rsp,%rdi,4) +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: arg_f32_v4f32: +; AVX: # %bb.0: +; AVX-NEXT: # kill: def $edi killed $edi def $rdi +; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; AVX-NEXT: andl $3, %edi +; AVX-NEXT: vmovss %xmm1, -24(%rsp,%rdi,4) +; AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 +; AVX-NEXT: retq + %ins = insertelement <4 x float> %v, float %x, i32 %y + ret <4 x float> %ins +} + +define <2 x double> @arg_f64_v2f64(<2 x double> %v, double %x, i32 %y) nounwind { +; SSE-LABEL: arg_f64_v2f64: +; SSE: # %bb.0: +; SSE-NEXT: # kill: def $edi killed $edi def $rdi +; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: andl $1, %edi +; SSE-NEXT: movsd %xmm1, -24(%rsp,%rdi,8) +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: arg_f64_v2f64: +; AVX: # %bb.0: +; AVX-NEXT: # kill: def $edi killed $edi def $rdi +; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; AVX-NEXT: andl $1, %edi +; AVX-NEXT: vmovsd %xmm1, -24(%rsp,%rdi,8) +; AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 +; AVX-NEXT: retq + %ins = insertelement <2 x double> %v, double %x, i32 %y + ret <2 x double> %ins +} + +define <16 x i8> @load_i8_v16i8(<16 x i8> %v, i8* %p, i32 %y) nounwind { +; SSE-LABEL: load_i8_v16i8: +; SSE: # %bb.0: +; SSE-NEXT: # kill: def $esi killed $esi def $rsi +; SSE-NEXT: movb (%rdi), %al +; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: andl $15, %esi +; SSE-NEXT: movb %al, -24(%rsp,%rsi) +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: load_i8_v16i8: +; AVX: # %bb.0: +; AVX-NEXT: # kill: def $esi killed $esi def $rsi +; AVX-NEXT: movb (%rdi), %al +; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; AVX-NEXT: andl $15, %esi +; AVX-NEXT: movb %al, -24(%rsp,%rsi) +; AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 +; AVX-NEXT: retq + %x = load i8, i8* %p + %ins = insertelement <16 x i8> %v, i8 %x, i32 %y + ret <16 x i8> %ins +} + +define <8 x i16> @load_i16_v8i16(<8 x i16> %v, i16* %p, i32 %y) nounwind { +; SSE-LABEL: load_i16_v8i16: +; SSE: # %bb.0: +; SSE-NEXT: # kill: def $esi killed $esi def $rsi +; SSE-NEXT: movzwl (%rdi), %eax +; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: andl $7, %esi +; SSE-NEXT: movw %ax, -24(%rsp,%rsi,2) +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: load_i16_v8i16: +; AVX: # %bb.0: +; AVX-NEXT: # kill: def $esi killed $esi def $rsi +; AVX-NEXT: movzwl (%rdi), %eax +; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; AVX-NEXT: andl $7, %esi +; AVX-NEXT: movw %ax, -24(%rsp,%rsi,2) +; AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 +; AVX-NEXT: retq + %x = load i16, i16* %p + %ins = insertelement <8 x i16> %v, i16 %x, i32 %y + ret <8 x i16> %ins +} + +define <4 x i32> @load_i32_v4i32(<4 x i32> %v, i32* %p, i32 %y) nounwind { +; SSE-LABEL: load_i32_v4i32: +; SSE: # %bb.0: +; SSE-NEXT: # kill: def $esi killed $esi def $rsi +; SSE-NEXT: movl (%rdi), %eax +; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: andl $3, %esi +; SSE-NEXT: movl %eax, -24(%rsp,%rsi,4) +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: load_i32_v4i32: +; AVX: # %bb.0: +; AVX-NEXT: # kill: def $esi killed $esi def $rsi +; AVX-NEXT: movl (%rdi), %eax +; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; AVX-NEXT: andl $3, %esi +; AVX-NEXT: movl %eax, -24(%rsp,%rsi,4) +; AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 +; AVX-NEXT: retq + %x = load i32, i32* %p + %ins = insertelement <4 x i32> %v, i32 %x, i32 %y + ret <4 x i32> %ins +} + +define <2 x i64> @load_i64_v2i64(<2 x i64> %v, i64* %p, i32 %y) nounwind { +; SSE-LABEL: load_i64_v2i64: +; SSE: # %bb.0: +; SSE-NEXT: # kill: def $esi killed $esi def $rsi +; SSE-NEXT: movq (%rdi), %rax +; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: andl $1, %esi +; SSE-NEXT: movq %rax, -24(%rsp,%rsi,8) +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: load_i64_v2i64: +; AVX: # %bb.0: +; AVX-NEXT: # kill: def $esi killed $esi def $rsi +; AVX-NEXT: movq (%rdi), %rax +; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; AVX-NEXT: andl $1, %esi +; AVX-NEXT: movq %rax, -24(%rsp,%rsi,8) +; AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 +; AVX-NEXT: retq + %x = load i64, i64* %p + %ins = insertelement <2 x i64> %v, i64 %x, i32 %y + ret <2 x i64> %ins +} + +define <4 x float> @load_f32_v4f32(<4 x float> %v, float* %p, i32 %y) nounwind { +; SSE-LABEL: load_f32_v4f32: +; SSE: # %bb.0: +; SSE-NEXT: # kill: def $esi killed $esi def $rsi +; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: andl $3, %esi +; SSE-NEXT: movss %xmm1, -24(%rsp,%rsi,4) +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: load_f32_v4f32: +; AVX: # %bb.0: +; AVX-NEXT: # kill: def $esi killed $esi def $rsi +; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; AVX-NEXT: andl $3, %esi +; AVX-NEXT: vmovss %xmm1, -24(%rsp,%rsi,4) +; AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 +; AVX-NEXT: retq + %x = load float, float* %p + %ins = insertelement <4 x float> %v, float %x, i32 %y + ret <4 x float> %ins +} + +define <2 x double> @load_f64_v2f64(<2 x double> %v, double* %p, i32 %y) nounwind { +; SSE-LABEL: load_f64_v2f64: +; SSE: # %bb.0: +; SSE-NEXT: # kill: def $esi killed $esi def $rsi +; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: andl $1, %esi +; SSE-NEXT: movsd %xmm1, -24(%rsp,%rsi,8) +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: load_f64_v2f64: +; AVX: # %bb.0: +; AVX-NEXT: # kill: def $esi killed $esi def $rsi +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; AVX-NEXT: andl $1, %esi +; AVX-NEXT: vmovsd %xmm1, -24(%rsp,%rsi,8) +; AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 +; AVX-NEXT: retq + %x = load double, double* %p + %ins = insertelement <2 x double> %v, double %x, i32 %y + ret <2 x double> %ins +} + +define <32 x i8> @arg_i8_v32i8(<32 x i8> %v, i8 %x, i32 %y) nounwind { +; SSE-LABEL: arg_i8_v32i8: +; SSE: # %bb.0: +; SSE-NEXT: # kill: def $esi killed $esi def $rsi +; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: andl $31, %esi +; SSE-NEXT: movb %dil, -40(%rsp,%rsi) +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 +; SSE-NEXT: retq +; +; AVX-LABEL: arg_i8_v32i8: +; AVX: # %bb.0: +; AVX-NEXT: pushq %rbp +; AVX-NEXT: movq %rsp, %rbp +; AVX-NEXT: andq $-32, %rsp +; AVX-NEXT: subq $64, %rsp +; AVX-NEXT: # kill: def $esi killed $esi def $rsi +; AVX-NEXT: vmovaps %ymm0, (%rsp) +; AVX-NEXT: andl $31, %esi +; AVX-NEXT: movb %dil, (%rsp,%rsi) +; AVX-NEXT: vmovaps (%rsp), %ymm0 +; AVX-NEXT: movq %rbp, %rsp +; AVX-NEXT: popq %rbp +; AVX-NEXT: retq + %ins = insertelement <32 x i8> %v, i8 %x, i32 %y + ret <32 x i8> %ins +} + +define <16 x i16> @arg_i16_v16i16(<16 x i16> %v, i16 %x, i32 %y) nounwind { +; SSE-LABEL: arg_i16_v16i16: +; SSE: # %bb.0: +; SSE-NEXT: # kill: def $esi killed $esi def $rsi +; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: andl $15, %esi +; SSE-NEXT: movw %di, -40(%rsp,%rsi,2) +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 +; SSE-NEXT: retq +; +; AVX-LABEL: arg_i16_v16i16: +; AVX: # %bb.0: +; AVX-NEXT: pushq %rbp +; AVX-NEXT: movq %rsp, %rbp +; AVX-NEXT: andq $-32, %rsp +; AVX-NEXT: subq $64, %rsp +; AVX-NEXT: # kill: def $esi killed $esi def $rsi +; AVX-NEXT: vmovaps %ymm0, (%rsp) +; AVX-NEXT: andl $15, %esi +; AVX-NEXT: movw %di, (%rsp,%rsi,2) +; AVX-NEXT: vmovaps (%rsp), %ymm0 +; AVX-NEXT: movq %rbp, %rsp +; AVX-NEXT: popq %rbp +; AVX-NEXT: retq + %ins = insertelement <16 x i16> %v, i16 %x, i32 %y + ret <16 x i16> %ins +} + +define <8 x i32> @arg_i32_v8i32(<8 x i32> %v, i32 %x, i32 %y) nounwind { +; SSE-LABEL: arg_i32_v8i32: +; SSE: # %bb.0: +; SSE-NEXT: # kill: def $esi killed $esi def $rsi +; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: andl $7, %esi +; SSE-NEXT: movl %edi, -40(%rsp,%rsi,4) +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 +; SSE-NEXT: retq +; +; AVX-LABEL: arg_i32_v8i32: +; AVX: # %bb.0: +; AVX-NEXT: pushq %rbp +; AVX-NEXT: movq %rsp, %rbp +; AVX-NEXT: andq $-32, %rsp +; AVX-NEXT: subq $64, %rsp +; AVX-NEXT: # kill: def $esi killed $esi def $rsi +; AVX-NEXT: vmovaps %ymm0, (%rsp) +; AVX-NEXT: andl $7, %esi +; AVX-NEXT: movl %edi, (%rsp,%rsi,4) +; AVX-NEXT: vmovaps (%rsp), %ymm0 +; AVX-NEXT: movq %rbp, %rsp +; AVX-NEXT: popq %rbp +; AVX-NEXT: retq + %ins = insertelement <8 x i32> %v, i32 %x, i32 %y + ret <8 x i32> %ins +} + +define <4 x i64> @arg_i64_v4i64(<4 x i64> %v, i64 %x, i32 %y) nounwind { +; SSE-LABEL: arg_i64_v4i64: +; SSE: # %bb.0: +; SSE-NEXT: # kill: def $esi killed $esi def $rsi +; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: andl $3, %esi +; SSE-NEXT: movq %rdi, -40(%rsp,%rsi,8) +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 +; SSE-NEXT: retq +; +; AVX-LABEL: arg_i64_v4i64: +; AVX: # %bb.0: +; AVX-NEXT: pushq %rbp +; AVX-NEXT: movq %rsp, %rbp +; AVX-NEXT: andq $-32, %rsp +; AVX-NEXT: subq $64, %rsp +; AVX-NEXT: # kill: def $esi killed $esi def $rsi +; AVX-NEXT: vmovaps %ymm0, (%rsp) +; AVX-NEXT: andl $3, %esi +; AVX-NEXT: movq %rdi, (%rsp,%rsi,8) +; AVX-NEXT: vmovaps (%rsp), %ymm0 +; AVX-NEXT: movq %rbp, %rsp +; AVX-NEXT: popq %rbp +; AVX-NEXT: retq + %ins = insertelement <4 x i64> %v, i64 %x, i32 %y + ret <4 x i64> %ins +} + +define <8 x float> @arg_f32_v8f32(<8 x float> %v, float %x, i32 %y) nounwind { +; SSE-LABEL: arg_f32_v8f32: +; SSE: # %bb.0: +; SSE-NEXT: # kill: def $edi killed $edi def $rdi +; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: andl $7, %edi +; SSE-NEXT: movss %xmm2, -40(%rsp,%rdi,4) +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 +; SSE-NEXT: retq +; +; AVX-LABEL: arg_f32_v8f32: +; AVX: # %bb.0: +; AVX-NEXT: pushq %rbp +; AVX-NEXT: movq %rsp, %rbp +; AVX-NEXT: andq $-32, %rsp +; AVX-NEXT: subq $64, %rsp +; AVX-NEXT: # kill: def $edi killed $edi def $rdi +; AVX-NEXT: vmovaps %ymm0, (%rsp) +; AVX-NEXT: andl $7, %edi +; AVX-NEXT: vmovss %xmm1, (%rsp,%rdi,4) +; AVX-NEXT: vmovaps (%rsp), %ymm0 +; AVX-NEXT: movq %rbp, %rsp +; AVX-NEXT: popq %rbp +; AVX-NEXT: retq + %ins = insertelement <8 x float> %v, float %x, i32 %y + ret <8 x float> %ins +} + +define <4 x double> @arg_f64_v4f64(<4 x double> %v, double %x, i32 %y) nounwind { +; SSE-LABEL: arg_f64_v4f64: +; SSE: # %bb.0: +; SSE-NEXT: # kill: def $edi killed $edi def $rdi +; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: andl $3, %edi +; SSE-NEXT: movsd %xmm2, -40(%rsp,%rdi,8) +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 +; SSE-NEXT: retq +; +; AVX-LABEL: arg_f64_v4f64: +; AVX: # %bb.0: +; AVX-NEXT: pushq %rbp +; AVX-NEXT: movq %rsp, %rbp +; AVX-NEXT: andq $-32, %rsp +; AVX-NEXT: subq $64, %rsp +; AVX-NEXT: # kill: def $edi killed $edi def $rdi +; AVX-NEXT: vmovaps %ymm0, (%rsp) +; AVX-NEXT: andl $3, %edi +; AVX-NEXT: vmovsd %xmm1, (%rsp,%rdi,8) +; AVX-NEXT: vmovaps (%rsp), %ymm0 +; AVX-NEXT: movq %rbp, %rsp +; AVX-NEXT: popq %rbp +; AVX-NEXT: retq + %ins = insertelement <4 x double> %v, double %x, i32 %y + ret <4 x double> %ins +} + +define <32 x i8> @load_i8_v32i8(<32 x i8> %v, i8* %p, i32 %y) nounwind { +; SSE-LABEL: load_i8_v32i8: +; SSE: # %bb.0: +; SSE-NEXT: # kill: def $esi killed $esi def $rsi +; SSE-NEXT: movb (%rdi), %al +; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: andl $31, %esi +; SSE-NEXT: movb %al, -40(%rsp,%rsi) +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 +; SSE-NEXT: retq +; +; AVX-LABEL: load_i8_v32i8: +; AVX: # %bb.0: +; AVX-NEXT: pushq %rbp +; AVX-NEXT: movq %rsp, %rbp +; AVX-NEXT: andq $-32, %rsp +; AVX-NEXT: subq $64, %rsp +; AVX-NEXT: # kill: def $esi killed $esi def $rsi +; AVX-NEXT: movb (%rdi), %al +; AVX-NEXT: vmovaps %ymm0, (%rsp) +; AVX-NEXT: andl $31, %esi +; AVX-NEXT: movb %al, (%rsp,%rsi) +; AVX-NEXT: vmovaps (%rsp), %ymm0 +; AVX-NEXT: movq %rbp, %rsp +; AVX-NEXT: popq %rbp +; AVX-NEXT: retq + %x = load i8, i8* %p + %ins = insertelement <32 x i8> %v, i8 %x, i32 %y + ret <32 x i8> %ins +} + +define <16 x i16> @load_i16_v16i16(<16 x i16> %v, i16* %p, i32 %y) nounwind { +; SSE-LABEL: load_i16_v16i16: +; SSE: # %bb.0: +; SSE-NEXT: # kill: def $esi killed $esi def $rsi +; SSE-NEXT: movzwl (%rdi), %eax +; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: andl $15, %esi +; SSE-NEXT: movw %ax, -40(%rsp,%rsi,2) +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 +; SSE-NEXT: retq +; +; AVX-LABEL: load_i16_v16i16: +; AVX: # %bb.0: +; AVX-NEXT: pushq %rbp +; AVX-NEXT: movq %rsp, %rbp +; AVX-NEXT: andq $-32, %rsp +; AVX-NEXT: subq $64, %rsp +; AVX-NEXT: # kill: def $esi killed $esi def $rsi +; AVX-NEXT: movzwl (%rdi), %eax +; AVX-NEXT: vmovaps %ymm0, (%rsp) +; AVX-NEXT: andl $15, %esi +; AVX-NEXT: movw %ax, (%rsp,%rsi,2) +; AVX-NEXT: vmovaps (%rsp), %ymm0 +; AVX-NEXT: movq %rbp, %rsp +; AVX-NEXT: popq %rbp +; AVX-NEXT: retq + %x = load i16, i16* %p + %ins = insertelement <16 x i16> %v, i16 %x, i32 %y + ret <16 x i16> %ins +} + +define <8 x i32> @load_i32_v8i32(<8 x i32> %v, i32* %p, i32 %y) nounwind { +; SSE-LABEL: load_i32_v8i32: +; SSE: # %bb.0: +; SSE-NEXT: # kill: def $esi killed $esi def $rsi +; SSE-NEXT: movl (%rdi), %eax +; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: andl $7, %esi +; SSE-NEXT: movl %eax, -40(%rsp,%rsi,4) +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 +; SSE-NEXT: retq +; +; AVX-LABEL: load_i32_v8i32: +; AVX: # %bb.0: +; AVX-NEXT: pushq %rbp +; AVX-NEXT: movq %rsp, %rbp +; AVX-NEXT: andq $-32, %rsp +; AVX-NEXT: subq $64, %rsp +; AVX-NEXT: # kill: def $esi killed $esi def $rsi +; AVX-NEXT: movl (%rdi), %eax +; AVX-NEXT: vmovaps %ymm0, (%rsp) +; AVX-NEXT: andl $7, %esi +; AVX-NEXT: movl %eax, (%rsp,%rsi,4) +; AVX-NEXT: vmovaps (%rsp), %ymm0 +; AVX-NEXT: movq %rbp, %rsp +; AVX-NEXT: popq %rbp +; AVX-NEXT: retq + %x = load i32, i32* %p + %ins = insertelement <8 x i32> %v, i32 %x, i32 %y + ret <8 x i32> %ins +} + +define <4 x i64> @load_i64_v4i64(<4 x i64> %v, i64* %p, i32 %y) nounwind { +; SSE-LABEL: load_i64_v4i64: +; SSE: # %bb.0: +; SSE-NEXT: # kill: def $esi killed $esi def $rsi +; SSE-NEXT: movq (%rdi), %rax +; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: andl $3, %esi +; SSE-NEXT: movq %rax, -40(%rsp,%rsi,8) +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 +; SSE-NEXT: retq +; +; AVX-LABEL: load_i64_v4i64: +; AVX: # %bb.0: +; AVX-NEXT: pushq %rbp +; AVX-NEXT: movq %rsp, %rbp +; AVX-NEXT: andq $-32, %rsp +; AVX-NEXT: subq $64, %rsp +; AVX-NEXT: # kill: def $esi killed $esi def $rsi +; AVX-NEXT: movq (%rdi), %rax +; AVX-NEXT: vmovaps %ymm0, (%rsp) +; AVX-NEXT: andl $3, %esi +; AVX-NEXT: movq %rax, (%rsp,%rsi,8) +; AVX-NEXT: vmovaps (%rsp), %ymm0 +; AVX-NEXT: movq %rbp, %rsp +; AVX-NEXT: popq %rbp +; AVX-NEXT: retq + %x = load i64, i64* %p + %ins = insertelement <4 x i64> %v, i64 %x, i32 %y + ret <4 x i64> %ins +} + +define <8 x float> @load_f32_v8f32(<8 x float> %v, float* %p, i32 %y) nounwind { +; SSE-LABEL: load_f32_v8f32: +; SSE: # %bb.0: +; SSE-NEXT: # kill: def $esi killed $esi def $rsi +; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: andl $7, %esi +; SSE-NEXT: movss %xmm2, -40(%rsp,%rsi,4) +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 +; SSE-NEXT: retq +; +; AVX-LABEL: load_f32_v8f32: +; AVX: # %bb.0: +; AVX-NEXT: pushq %rbp +; AVX-NEXT: movq %rsp, %rbp +; AVX-NEXT: andq $-32, %rsp +; AVX-NEXT: subq $64, %rsp +; AVX-NEXT: # kill: def $esi killed $esi def $rsi +; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX-NEXT: vmovaps %ymm0, (%rsp) +; AVX-NEXT: andl $7, %esi +; AVX-NEXT: vmovss %xmm1, (%rsp,%rsi,4) +; AVX-NEXT: vmovaps (%rsp), %ymm0 +; AVX-NEXT: movq %rbp, %rsp +; AVX-NEXT: popq %rbp +; AVX-NEXT: retq + %x = load float, float* %p + %ins = insertelement <8 x float> %v, float %x, i32 %y + ret <8 x float> %ins +} + +define <4 x double> @load_f64_v4f64(<4 x double> %v, double* %p, i32 %y) nounwind { +; SSE-LABEL: load_f64_v4f64: +; SSE: # %bb.0: +; SSE-NEXT: # kill: def $esi killed $esi def $rsi +; SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero +; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: andl $3, %esi +; SSE-NEXT: movsd %xmm2, -40(%rsp,%rsi,8) +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 +; SSE-NEXT: retq +; +; AVX-LABEL: load_f64_v4f64: +; AVX: # %bb.0: +; AVX-NEXT: pushq %rbp +; AVX-NEXT: movq %rsp, %rbp +; AVX-NEXT: andq $-32, %rsp +; AVX-NEXT: subq $64, %rsp +; AVX-NEXT: # kill: def $esi killed $esi def $rsi +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX-NEXT: vmovaps %ymm0, (%rsp) +; AVX-NEXT: andl $3, %esi +; AVX-NEXT: vmovsd %xmm1, (%rsp,%rsi,8) +; AVX-NEXT: vmovaps (%rsp), %ymm0 +; AVX-NEXT: movq %rbp, %rsp +; AVX-NEXT: popq %rbp +; AVX-NEXT: retq + %x = load double, double* %p + %ins = insertelement <4 x double> %v, double %x, i32 %y + ret <4 x double> %ins +} + ; Don't die trying to insert to an invalid index. define i32 @PR44139(<16 x i64>* %p) { From b6e4aae2cc269bbdce9263cd49dcd582840b333a Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 21 Oct 2020 10:11:20 -0700 Subject: [PATCH 069/179] [ELF] --gc-sections: retain dependent sections of non-SHF_ALLOC sections Fix http://lists.llvm.org/pipermail/llvm-dev/2020-October/145908.html Currently non-SHF_ALLOC SHT_REL[A] (due to --emit-relocs) and SHF_LINK_ORDER are not marked live. Reviewed By: grimar, psmith Differential Revision: https://reviews.llvm.org/D89841 --- lld/ELF/MarkLive.cpp | 21 ++++++++++++--------- lld/test/ELF/gc-sections-linkorder2.s | 6 ++++++ 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp index af6c08c215816..fe5d32c80bfea 100644 --- a/lld/ELF/MarkLive.cpp +++ b/lld/ELF/MarkLive.cpp @@ -339,16 +339,16 @@ template void elf::markLive() { // Otherwise, do mark-sweep GC. // - // The -gc-sections option works only for SHF_ALLOC sections - // (sections that are memory-mapped at runtime). So we can - // unconditionally make non-SHF_ALLOC sections alive except - // SHF_LINK_ORDER and SHT_REL/SHT_RELA sections. + // The -gc-sections option works only for SHF_ALLOC sections (sections that + // are memory-mapped at runtime). So we can unconditionally make non-SHF_ALLOC + // sections alive except SHF_LINK_ORDER, SHT_REL/SHT_RELA sections, and + // sections in a group. // // Usually, non-SHF_ALLOC sections are not removed even if they are - // unreachable through relocations because reachability is not - // a good signal whether they are garbage or not (e.g. there is - // usually no section referring to a .comment section, but we - // want to keep it.). + // unreachable through relocations because reachability is not a good signal + // whether they are garbage or not (e.g. there is usually no section referring + // to a .comment section, but we want to keep it.) When a non-SHF_ALLOC + // section is retained, we also retain sections dependent on it. // // Note on SHF_LINK_ORDER: Such sections contain metadata and they // have a reverse dependency on the InputSection they are linked with. @@ -370,8 +370,11 @@ template void elf::markLive() { bool isLinkOrder = (sec->flags & SHF_LINK_ORDER); bool isRel = (sec->type == SHT_REL || sec->type == SHT_RELA); - if (!isAlloc && !isLinkOrder && !isRel && !sec->nextInSectionGroup) + if (!isAlloc && !isLinkOrder && !isRel && !sec->nextInSectionGroup) { sec->markLive(); + for (InputSection *isec : sec->dependentSections) + isec->markLive(); + } } // Follow the graph to mark all live sections. diff --git a/lld/test/ELF/gc-sections-linkorder2.s b/lld/test/ELF/gc-sections-linkorder2.s index b22bc589f56ea..2024972b3937e 100644 --- a/lld/test/ELF/gc-sections-linkorder2.s +++ b/lld/test/ELF/gc-sections-linkorder2.s @@ -14,3 +14,9 @@ _start: .quad 0 .section .zed,"ao",@progbits,.foo .quad 0 + +.section .nonalloc +.quad 0 + +.section .nonalloc_linkorder,"o",@progbits,.nonalloc +.quad 0 From 611959f004d78f99e45fdc940a7a4322b85d04d9 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Fri, 16 Oct 2020 15:22:42 -0700 Subject: [PATCH 070/179] [AMDGPU] Fixed v_swap_b32 match 1. Fixed liveness issue with implicit kills. 2. Fixed potential problem with an indirect mov. Fixes: SWDEV-256848 Differential Revision: https://reviews.llvm.org/D89599 --- .../Target/AMDGPU/SIShrinkInstructions.cpp | 62 ++++- llvm/test/CodeGen/AMDGPU/v_swap_b32.mir | 223 +++++++++++++++++- 2 files changed, 275 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp index 94ade095e7a90..29cb1001fcad3 100644 --- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -437,6 +437,22 @@ getSubRegForIndex(Register Reg, unsigned Sub, unsigned I, return TargetInstrInfo::RegSubRegPair(Reg, Sub); } +static void dropInstructionKeepingImpDefs(MachineInstr &MI, + const SIInstrInfo *TII) { + for (unsigned i = MI.getDesc().getNumOperands() + + MI.getDesc().getNumImplicitUses() + + MI.getDesc().getNumImplicitDefs(), e = MI.getNumOperands(); + i != e; ++i) { + const MachineOperand &Op = MI.getOperand(i); + if (!Op.isDef()) + continue; + BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(), + TII->get(AMDGPU::IMPLICIT_DEF), Op.getReg()); + } + + MI.eraseFromParent(); +} + // Match: // mov t, x // mov x, y @@ -476,18 +492,25 @@ static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI, if (!TRI.isVGPR(MRI, X)) return nullptr; + if (MovT.hasRegisterImplicitUseOperand(AMDGPU::M0)) + return nullptr; + const unsigned SearchLimit = 16; unsigned Count = 0; + bool KilledT = false; for (auto Iter = std::next(MovT.getIterator()), E = MovT.getParent()->instr_end(); - Iter != E && Count < SearchLimit; ++Iter, ++Count) { + Iter != E && Count < SearchLimit && !KilledT; ++Iter, ++Count) { MachineInstr *MovY = &*Iter; + KilledT = MovY->killsRegister(T, &TRI); + if ((MovY->getOpcode() != AMDGPU::V_MOV_B32_e32 && MovY->getOpcode() != AMDGPU::COPY) || !MovY->getOperand(1).isReg() || MovY->getOperand(1).getReg() != T || - MovY->getOperand(1).getSubReg() != Tsub) + MovY->getOperand(1).getSubReg() != Tsub || + MovY->hasRegisterImplicitUseOperand(AMDGPU::M0)) continue; Register Y = MovY->getOperand(0).getReg(); @@ -521,32 +544,53 @@ static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI, MovX = nullptr; break; } + // Implicit use of M0 is an indirect move. + if (I->hasRegisterImplicitUseOperand(AMDGPU::M0)) + continue; + + if (Size > 1 && (I->getNumImplicitOperands() > (I->isCopy() ? 0 : 1))) + continue; + MovX = &*I; } if (!MovX) continue; - LLVM_DEBUG(dbgs() << "Matched v_swap_b32:\n" << MovT << *MovX << MovY); + LLVM_DEBUG(dbgs() << "Matched v_swap_b32:\n" << MovT << *MovX << *MovY); for (unsigned I = 0; I < Size; ++I) { TargetInstrInfo::RegSubRegPair X1, Y1; X1 = getSubRegForIndex(X, Xsub, I, TRI, MRI); Y1 = getSubRegForIndex(Y, Ysub, I, TRI, MRI); - BuildMI(*MovT.getParent(), MovX->getIterator(), MovT.getDebugLoc(), - TII->get(AMDGPU::V_SWAP_B32)) + MachineBasicBlock &MBB = *MovT.getParent(); + auto MIB = BuildMI(MBB, MovX->getIterator(), MovT.getDebugLoc(), + TII->get(AMDGPU::V_SWAP_B32)) .addDef(X1.Reg, 0, X1.SubReg) .addDef(Y1.Reg, 0, Y1.SubReg) .addReg(Y1.Reg, 0, Y1.SubReg) .addReg(X1.Reg, 0, X1.SubReg).getInstr(); + if (MovX->hasRegisterImplicitUseOperand(AMDGPU::EXEC)) { + // Drop implicit EXEC. + MIB->RemoveOperand(MIB->getNumExplicitOperands()); + MIB->copyImplicitOps(*MBB.getParent(), *MovX); + } } MovX->eraseFromParent(); - MovY->eraseFromParent(); + dropInstructionKeepingImpDefs(*MovY, TII); MachineInstr *Next = &*std::next(MovT.getIterator()); - if (MRI.use_nodbg_empty(T)) - MovT.eraseFromParent(); - else + + if (MRI.use_nodbg_empty(T)) { + dropInstructionKeepingImpDefs(MovT, TII); + } else { Xop.setIsKill(false); + for (int I = MovT.getNumImplicitOperands() - 1; I >= 0; --I ) { + unsigned OpNo = MovT.getNumExplicitOperands() + I; + const MachineOperand &Op = MovT.getOperand(OpNo); + if (Op.isKill() && TRI.regsOverlap(X, Op.getReg())) + MovT.RemoveOperand(OpNo); + } + } return Next; } diff --git a/llvm/test/CodeGen/AMDGPU/v_swap_b32.mir b/llvm/test/CodeGen/AMDGPU/v_swap_b32.mir index 3190641ae6910..d557060207df5 100644 --- a/llvm/test/CodeGen/AMDGPU/v_swap_b32.mir +++ b/llvm/test/CodeGen/AMDGPU/v_swap_b32.mir @@ -517,7 +517,9 @@ body: | ... # GCN-LABEL: name: swap_virt_copy_subreg_impdef_super -# GCN: %0.sub0:vreg_64, %1.sub0:vreg_64 = V_SWAP_B32 %1.sub0, %0.sub0, implicit $exec +# GCN: %2:vreg_64 = IMPLICIT_DEF +# GCN-NEXT: %2.sub1:vreg_64 = COPY %0.sub1 +# GCN-NEXT: %0.sub0:vreg_64, %1.sub0:vreg_64 = V_SWAP_B32 %1.sub0, %0.sub0, implicit $exec --- name: swap_virt_copy_subreg_impdef_super registers: @@ -672,3 +674,222 @@ body: | %1 = COPY %2 S_ENDPGM 0 ... + +# GCN-LABEL: name: swap_liveness_error_mov +# GCN: $vgpr6 = V_MOV_B32_e32 $vgpr1, implicit $exec +# GCN-NEXT: $vgpr1, $vgpr5 = V_SWAP_B32 $vgpr5, $vgpr1, implicit $exec +# GCN-NEXT: $vgpr5_vgpr6 = IMPLICIT_DEF +# GCN-NEXT: $vgpr6 = V_MOV_B32_e32 $vgpr7, implicit $exec, implicit $vgpr6_vgpr7 +# GCN-NEXT: $vgpr5 = V_MOV_B32_e32 $vgpr6, implicit $exec + +--- +name: swap_liveness_error_mov +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr5, $vgpr1_vgpr2 + + $vgpr6 = V_MOV_B32_e32 $vgpr1, implicit $exec, implicit killed $vgpr1_vgpr2 + $vgpr1 = V_MOV_B32_e32 killed $vgpr5, implicit $exec + $vgpr5 = V_MOV_B32_e32 $vgpr6, implicit $exec, implicit-def $vgpr5_vgpr6, implicit $vgpr6_vgpr7 + $vgpr6 = V_MOV_B32_e32 $vgpr7, implicit $exec, implicit $vgpr6_vgpr7 + $vgpr5 = V_MOV_B32_e32 $vgpr6, implicit $exec + S_ENDPGM 0 +... + +# GCN-LABEL: name: swap_liveness_error_copy +# GCN: $vgpr6 = COPY $vgpr1 +# GCN-NEXT: $vgpr1, $vgpr5 = V_SWAP_B32 $vgpr5, $vgpr1, implicit $exec +# GCN-NEXT: $vgpr5_vgpr6 = IMPLICIT_DEF +# GCN-NEXT: $vgpr6 = COPY $vgpr7, implicit $vgpr6_vgpr7 +# GCN-NEXT: $vgpr5 = COPY $vgpr6 + +--- +name: swap_liveness_error_copy +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr5, $vgpr1_vgpr2 + + $vgpr6 = COPY $vgpr1, implicit killed $vgpr1_vgpr2 + $vgpr1 = COPY killed $vgpr5 + $vgpr5 = COPY $vgpr6, implicit-def $vgpr5_vgpr6, implicit $vgpr6_vgpr7 + $vgpr6 = COPY $vgpr7, implicit $vgpr6_vgpr7 + $vgpr5 = COPY $vgpr6 + S_ENDPGM 0 +... + +# GCN-LABEL: name: swap_killed_t_early +# GCN: $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec +# GCN-NEXT: $vgpr3 = V_MOV_B32_e32 killed $vgpr4, implicit $exec, implicit killed $vgpr2 +# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec +# GCN-NEXT: $vgpr5 = V_MOV_B32_e32 killed $vgpr6, implicit $exec +# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 undef $vgpr2, implicit $exec + +--- +name: swap_killed_t_early +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec + $vgpr3 = V_MOV_B32_e32 killed $vgpr4, implicit $exec, implicit killed $vgpr2 + $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec + $vgpr5 = V_MOV_B32_e32 killed $vgpr6, implicit $exec + $vgpr1 = V_MOV_B32_e32 undef $vgpr2, implicit $exec + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 +... + +# GCN-LABEL: name: swap_killed_t_late +# GCN: $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec +# GCN-NEXT: $vgpr3 = V_MOV_B32_e32 killed $vgpr4, implicit $exec +# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec +# GCN-NEXT: $vgpr5 = V_MOV_B32_e32 killed $vgpr6, implicit $exec, implicit killed $vgpr2 +# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 undef $vgpr2, implicit $exec + +--- +name: swap_killed_t_late +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec + $vgpr3 = V_MOV_B32_e32 killed $vgpr4, implicit $exec + $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec + $vgpr5 = V_MOV_B32_e32 killed $vgpr6, implicit $exec, implicit killed $vgpr2 + $vgpr1 = V_MOV_B32_e32 undef $vgpr2, implicit $exec + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 +... + +# GCN-LABEL: name: swap_killed_x +# GCN: $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec +# GCN-NEXT: $vgpr3 = V_MOV_B32_e32 killed $vgpr4, implicit $exec +# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec +# GCN-NEXT: $vgpr5 = V_MOV_B32_e32 killed $vgpr6, implicit $exec, implicit killed $vgpr0 +# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec + +--- +name: swap_killed_x +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec + $vgpr3 = V_MOV_B32_e32 killed $vgpr4, implicit $exec + $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec + $vgpr5 = V_MOV_B32_e32 killed $vgpr6, implicit $exec, implicit killed $vgpr0 + $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 +... + +# GCN-LABEL: name: indirect_mov_t +# GCN: $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec, implicit $m0 +# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec +# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec +# GCN-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + +--- +name: indirect_mov_t +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec, implicit $m0 + $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec + $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 +... + +# GCN-LABEL: name: indirect_mov_x +# GCN: $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec +# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec, implicit $m0 +# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec +# GCN-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + +--- +name: indirect_mov_x +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec + $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec, implicit $m0 + $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 +... + +# GCN-LABEL: name: indirect_mov_y +# GCN: $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec +# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec +# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec, implicit $m0 +# GCN-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + +--- +name: indirect_mov_y +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec + $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec + $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec, implicit $m0 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 +... + +# GCN-LABEL: name: implicit_ops_mov_x_swap_b32 +# GCN: $vgpr0, $vgpr1 = V_SWAP_B32 $vgpr1, $vgpr0, implicit $exec, implicit $vgpr2, implicit killed $vgpr1_vgpr2 + +--- +name: implicit_ops_mov_x_swap_b32 +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + $vgpr3 = V_MOV_B32_e32 killed $vgpr0, implicit $exec + $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec, implicit $vgpr2, implicit killed $vgpr1_vgpr2 + $vgpr1 = V_MOV_B32_e32 killed $vgpr3, implicit $exec + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 +... + +# GCN-LABEL: name: implict_ops_mov_x_swap_b64 +# GCN: %2:vreg_64 = COPY %0 +# GCN-NEXT: %0:vreg_64 = COPY %1, implicit $vgpr0 +# GCN-NEXT: %1:vreg_64 = COPY %2 + +--- +name: implict_ops_mov_x_swap_b64 +registers: + - { id: 0, class: vreg_64 } + - { id: 1, class: vreg_64 } + - { id: 2, class: vreg_64 } +body: | + bb.0: + %0 = IMPLICIT_DEF + %1 = IMPLICIT_DEF + %2 = COPY %0 + %0 = COPY %1, implicit $vgpr0 + %1 = COPY %2 +... + +# GCN-LABEL: implicit_ops_mov_t_swap_b32 +# GCN: $vgpr1 = IMPLICIT_DEF +# GCN-NEXT: $vgpr0, $vgpr1 = V_SWAP_B32 $vgpr1, $vgpr0, implicit $exec + +--- +name: implicit_ops_mov_t_swap_b32 +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + $vgpr3 = V_MOV_B32_e32 killed $vgpr0, implicit $exec, implicit $vgpr2, implicit killed $vgpr1_vgpr2, implicit-def $vgpr1 + $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec + $vgpr1 = V_MOV_B32_e32 killed $vgpr3, implicit $exec + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 +... + +# GCN-LABEL: implicit_ops_mov_y_swap_b32 +# GCN: $vgpr0, $vgpr1 = V_SWAP_B32 $vgpr1, $vgpr0, implicit $exec +# GCN-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF + +--- +name: implicit_ops_mov_y_swap_b32 +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + $vgpr3 = V_MOV_B32_e32 killed $vgpr0, implicit $exec + $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec + $vgpr1 = V_MOV_B32_e32 killed $vgpr3, implicit $exec, implicit $vgpr2, implicit-def $vgpr0_vgpr1, implicit killed $vgpr3 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 +... From 6b66f1cd9ba4533904a733329890e911ae0f5f6f Mon Sep 17 00:00:00 2001 From: Peter Steinfeld Date: Mon, 19 Oct 2020 11:01:13 -0700 Subject: [PATCH 071/179] [flang] Another validity of the TARGET= argument of ASSOCIATED() for objects In my previous implementation of the semantic checks for ASSOCIATED(), I had neglected to check the TARGET= argument for objects to ensure that it has either the POINTER or TARGET attributes. I added an implementation and a test. Differential Revision: https://reviews.llvm.org/D89717 --- flang/lib/Evaluate/intrinsics.cpp | 13 +++++++++++++ flang/test/Semantics/associated.f90 | 5 ++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/flang/lib/Evaluate/intrinsics.cpp b/flang/lib/Evaluate/intrinsics.cpp index 83fdc76c9bcd6..eaff9620a22dd 100644 --- a/flang/lib/Evaluate/intrinsics.cpp +++ b/flang/lib/Evaluate/intrinsics.cpp @@ -1959,6 +1959,19 @@ static bool CheckAssociated(SpecificCall &call, *pointerSymbol); } else { // object pointer and target + if (const Symbol * targetSymbol{GetLastSymbol(*targetExpr)}) { + if (!(targetSymbol->attrs().test(semantics::Attr::POINTER) || + targetSymbol->attrs().test( + semantics::Attr::TARGET))) { + AttachDeclaration( + messages.Say("TARGET= argument '%s' must have either " + "the POINTER or the TARGET " + "attribute"_err_en_US, + targetName), + *targetSymbol); + } + } + if (const auto pointerType{pointerArg->GetType()}) { if (const auto targetType{targetArg->GetType()}) { ok = pointerType->IsTkCompatibleWith(*targetType); diff --git a/flang/test/Semantics/associated.f90 b/flang/test/Semantics/associated.f90 index b78ccb017b162..641b7d97d78af 100644 --- a/flang/test/Semantics/associated.f90 +++ b/flang/test/Semantics/associated.f90 @@ -74,7 +74,6 @@ subroutine test() lVar = associated(intVar, intVar) !ERROR: POINTER= argument of ASSOCIATED() must be a POINTER lVar = associated(intAllocVar) - lVar = associated(intPointerVar1, intVar) !OK !ERROR: Arguments of ASSOCIATED() must be a POINTER and an optional valid target lVar = associated(intPointerVar1, targetRealVar) lVar = associated(intPointerVar1, targetIntVar1) !OK @@ -82,6 +81,10 @@ subroutine test() lVar = associated(intPointerVar1, targetIntVar2) lVar = associated(intPointerVar1) !OK lVar = associated(intPointerVar1, intPointerVar2) !OK + !ERROR: In assignment to object pointer 'intpointervar1', the target 'intvar' is not an object with POINTER or TARGET attributes + intPointerVar1 => intVar + !ERROR: TARGET= argument 'intvar' must have either the POINTER or the TARGET attribute + lVar = associated(intPointerVar1, intVar) ! Procedure pointer tests intprocPointer1 => intProc !OK From cf11f017af32a797e301f56b163a989ad73630fa Mon Sep 17 00:00:00 2001 From: Xiangling Liao Date: Wed, 21 Oct 2020 13:34:56 -0400 Subject: [PATCH 072/179] [NFC] Fix the definition of SuitableAlign --- clang/include/clang/Basic/TargetInfo.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index 7253b5ea9abed..26dc6eacb2041 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -581,8 +581,9 @@ class TargetInfo : public virtual TransferrableTargetInfo, /// Determine whether constrained floating point is supported on this target. virtual bool hasStrictFP() const { return HasStrictFP; } - /// Return the alignment that is suitable for storing any - /// object with a fundamental alignment requirement. + /// Return the alignment that is the largest alignment ever used for any + /// scalar/SIMD data type on the target machine you are compiling for + /// (including types with an extended alignment requirement). unsigned getSuitableAlign() const { return SuitableAlign; } /// Return the default alignment for __attribute__((aligned)) on From 817cd3d191be44067c1d037729e0cdfe89e35a77 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 8 Oct 2020 20:05:18 -0400 Subject: [PATCH 073/179] Fix missing c++ mode comment --- llvm/include/llvm/LTO/Config.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/LTO/Config.h b/llvm/include/llvm/LTO/Config.h index 0a3e523164601..a305778a305b0 100644 --- a/llvm/include/llvm/LTO/Config.h +++ b/llvm/include/llvm/LTO/Config.h @@ -1,4 +1,4 @@ -//===-Config.h - LLVM Link Time Optimizer Configuration -------------------===// +//===-Config.h - LLVM Link Time Optimizer Configuration ---------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. From bf9edcb6fda7e19487c2dca605a95e8a6779a80a Mon Sep 17 00:00:00 2001 From: Evgeny Leviant Date: Wed, 21 Oct 2020 20:49:10 +0300 Subject: [PATCH 074/179] [ARM][SchedModels] Convert IsLdrAm3RegOffPred to MCSchedPredicate Differential revision: https://reviews.llvm.org/D89876 --- llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp | 6 ------ llvm/lib/Target/ARM/ARMBaseInstrInfo.h | 1 - llvm/lib/Target/ARM/ARMScheduleA57.td | 11 +++-------- .../llvm-mca/ARM/cortex-a57-memory-instructions.s | 2 +- 4 files changed, 4 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 3822f9057d949..0632e1be2fc97 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -612,12 +612,6 @@ bool ARMBaseInstrInfo::isCPSRDefined(const MachineInstr &MI) { return false; } -bool ARMBaseInstrInfo::isAddrMode3OpImm(const MachineInstr &MI, - unsigned Op) const { - const MachineOperand &Offset = MI.getOperand(Op + 1); - return Offset.getReg() != 0; -} - // Load with negative register offset requires additional 1cyc and +I unit // for Cortex A57 bool ARMBaseInstrInfo::isAddrMode3OpMinusReg(const MachineInstr &MI, diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index f997322107afb..f894a85c914f3 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -178,7 +178,6 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo { // CPSR defined in instruction static bool isCPSRDefined(const MachineInstr &MI); - bool isAddrMode3OpImm(const MachineInstr &MI, unsigned Op) const; bool isAddrMode3OpMinusReg(const MachineInstr &MI, unsigned Op) const; // Load, scaled register offset diff --git a/llvm/lib/Target/ARM/ARMScheduleA57.td b/llvm/lib/Target/ARM/ARMScheduleA57.td index 3663b9d7d653a..1a9de1d9fb789 100644 --- a/llvm/lib/Target/ARM/ARMScheduleA57.td +++ b/llvm/lib/Target/ARM/ARMScheduleA57.td @@ -28,14 +28,9 @@ def IsCPSRDefinedAndPredicatedPred : // Cortex A57 rev. r1p0 or later (false = r0px) def IsR1P0AndLaterPred : SchedPredicate<[{false}]>; -// If Addrmode3 contains register offset (not immediate) -def IsLdrAm3RegOffPred : - SchedPredicate<[{!TII->isAddrMode3OpImm(*MI, 1)}]>; -// The same predicate with operand offset 2 and 3: -def IsLdrAm3RegOffPredX2 : - SchedPredicate<[{!TII->isAddrMode3OpImm(*MI, 2)}]>; -def IsLdrAm3RegOffPredX3 : - SchedPredicate<[{!TII->isAddrMode3OpImm(*MI, 3)}]>; +def IsLdrAm3RegOffPred : MCSchedPredicate>; +def IsLdrAm3RegOffPredX2 : MCSchedPredicate>; +def IsLdrAm3RegOffPredX3 : MCSchedPredicate>; // If Addrmode3 contains "minus register" def IsLdrAm3NegRegOffPred : diff --git a/llvm/test/tools/llvm-mca/ARM/cortex-a57-memory-instructions.s b/llvm/test/tools/llvm-mca/ARM/cortex-a57-memory-instructions.s index a0c99f77720e8..96fd4206a41d0 100644 --- a/llvm/test/tools/llvm-mca/ARM/cortex-a57-memory-instructions.s +++ b/llvm/test/tools/llvm-mca/ARM/cortex-a57-memory-instructions.s @@ -180,7 +180,7 @@ # CHECK-NEXT: 2 4 1.00 * ldrbt r1, [r2], -r6, lsl #12 # CHECK-NEXT: 2 4 2.00 * ldrd r0, r1, [r5] # CHECK-NEXT: 2 4 2.00 * ldrd r8, r9, [r2, #15] -# CHECK-NEXT: 4 4 2.00 * ldrd r2, r3, [r9, #32]! +# CHECK-NEXT: 4 5 2.00 * ldrd r2, r3, [r9, #32]! # CHECK-NEXT: 4 4 2.00 * ldrd r6, r7, [r1], #8 # CHECK-NEXT: 4 4 2.00 * ldrd r2, r3, [r8], #0 # CHECK-NEXT: 4 4 2.00 * ldrd r2, r3, [r8], #0 From 26790ed248870a1e293e844945bf677825a43084 Mon Sep 17 00:00:00 2001 From: Jon Chesterfield Date: Wed, 21 Oct 2020 18:52:53 +0100 Subject: [PATCH 075/179] [libomptarget] Require LLVM source tree to build libomptarget [libomptarget] Require LLVM source tree to build libomptarget This is to permit reliably #including files from the LLVM tree in libomptarget, as an improvement on the copy and paste that is currently in use. See D87841 for the first example of removing duplication given this new requirement. The weekly openmp dev call reached consensus on this approach. See also D87841 for some alternatives that were considered. In the future, we may want to introduce a new top level repo for shared constants, or start using the ADT library within openmp. This will break sufficiently exotic build systems, trivial fixes as below. Building libomptarget as part of the monorepo will continue to work. If openmp is built separately, it now requires a cmake macro indicating where to find the LLVM source tree. If openmp is built separately, without the llvm source tree already on disk, the build machine will need a copy of a subset of the llvm source tree and the cmake macro indicating where it is. Reviewed By: protze.joachim Differential Revision: https://reviews.llvm.org/D89426 --- openmp/CMakeLists.txt | 14 ++++++++++++++ openmp/libomptarget/CMakeLists.txt | 5 +++++ openmp/libomptarget/plugins/amdgpu/CMakeLists.txt | 6 +++--- 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/openmp/CMakeLists.txt b/openmp/CMakeLists.txt index 51f496eff1bff..6cc36d9b75773 100644 --- a/openmp/CMakeLists.txt +++ b/openmp/CMakeLists.txt @@ -66,6 +66,20 @@ if (APPLE OR WIN32 OR NOT OPENMP_HAVE_STD_CPP14_FLAG) set(ENABLE_LIBOMPTARGET OFF) endif() +# Attempt to locate LLVM source, required by libomptarget +if (NOT LIBOMPTARGET_LLVM_MAIN_INCLUDE_DIR) + if (LLVM_MAIN_INCLUDE_DIR) + set(LIBOMPTARGET_LLVM_MAIN_INCLUDE_DIR ${LLVM_MAIN_INCLUDE_DIR}) + elseif (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/../llvm/include) + set(LIBOMPTARGET_LLVM_MAIN_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../llvm/include) + endif() +endif() + +if (NOT LIBOMPTARGET_LLVM_MAIN_INCLUDE_DIR) + message(STATUS "Missing definition for LIBOMPTARGET_LLVM_MAIN_INCLUDE_DIR, disabling libomptarget") + set(ENABLE_LIBOMPTARGET OFF) +endif() + option(OPENMP_ENABLE_LIBOMPTARGET "Enable building libomptarget for offloading." ${ENABLE_LIBOMPTARGET}) if (OPENMP_ENABLE_LIBOMPTARGET) diff --git a/openmp/libomptarget/CMakeLists.txt b/openmp/libomptarget/CMakeLists.txt index c1bc29faaf45d..7ef0bafdf3c67 100644 --- a/openmp/libomptarget/CMakeLists.txt +++ b/openmp/libomptarget/CMakeLists.txt @@ -29,6 +29,11 @@ include(LibomptargetUtils) # Get dependencies for the different components of the project. include(LibomptargetGetDependencies) +# LLVM source tree is required at build time for libomptarget +if (NOT LIBOMPTARGET_LLVM_MAIN_INCLUDE_DIR) + message(FATAL_ERROR "Missing definition for LIBOMPTARGET_LLVM_MAIN_INCLUDE_DIR") +endif() + # This is a list of all the targets that are supported/tested right now. set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} aarch64-unknown-linux-gnu") set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} powerpc64le-ibm-linux-gnu") diff --git a/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt b/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt index 3882b777f5b1e..0c50ffdf2fa6e 100644 --- a/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt +++ b/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt @@ -30,8 +30,8 @@ if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(ppc64le)|(aarch64)$" AND CMAKE_ return() endif() -if (NOT LLVM_MAIN_INCLUDE_DIR) - libomptarget_say("Not building AMDGPU plugin: Missing definition for LLVM_MAIN_INCLUDE_DIR") +if (NOT LIBOMPTARGET_LLVM_MAIN_INCLUDE_DIR) + libomptarget_say("Not building AMDGPU plugin: Missing definition for LIBOMPTARGET_LLVM_MAIN_INCLUDE_DIR") return() endif() @@ -50,7 +50,7 @@ endif() include_directories( ${CMAKE_CURRENT_SOURCE_DIR}/impl - ${LLVM_MAIN_INCLUDE_DIR} + ${LIBOMPTARGET_LLVM_MAIN_INCLUDE_DIR} ) add_library(omptarget.rtl.amdgpu SHARED From 958abe01802ca33d506e54f88f6a2563b95154f5 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Wed, 7 Oct 2020 19:35:39 -0700 Subject: [PATCH 076/179] [NFC] Clean up always false variables Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D89023 --- llvm/include/llvm/Analysis/RegionPass.h | 2 - llvm/lib/Analysis/RegionPass.cpp | 50 +++++++------------------ 2 files changed, 14 insertions(+), 38 deletions(-) diff --git a/llvm/include/llvm/Analysis/RegionPass.h b/llvm/include/llvm/Analysis/RegionPass.h index 995c5dca3de3b..5c7fa5f56693a 100644 --- a/llvm/include/llvm/Analysis/RegionPass.h +++ b/llvm/include/llvm/Analysis/RegionPass.h @@ -85,8 +85,6 @@ class RegionPass : public Pass { /// The pass manager to schedule RegionPasses. class RGPassManager : public FunctionPass, public PMDataManager { std::deque RQ; - bool skipThisRegion; - bool redoThisRegion; RegionInfo *RI; Region *CurrentRegion; diff --git a/llvm/lib/Analysis/RegionPass.cpp b/llvm/lib/Analysis/RegionPass.cpp index ace682465abba..a73607dbef61b 100644 --- a/llvm/lib/Analysis/RegionPass.cpp +++ b/llvm/lib/Analysis/RegionPass.cpp @@ -31,8 +31,6 @@ char RGPassManager::ID = 0; RGPassManager::RGPassManager() : FunctionPass(ID), PMDataManager() { - skipThisRegion = false; - redoThisRegion = false; RI = nullptr; CurrentRegion = nullptr; } @@ -76,8 +74,6 @@ bool RGPassManager::runOnFunction(Function &F) { while (!RQ.empty()) { CurrentRegion = RQ.back(); - skipThisRegion = false; - redoThisRegion = false; // Run all passes on the current Region. for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { @@ -115,54 +111,36 @@ bool RGPassManager::runOnFunction(Function &F) { if (isPassDebuggingExecutionsOrMore()) { if (LocalChanged) dumpPassInfo(P, MODIFICATION_MSG, ON_REGION_MSG, - skipThisRegion ? "" : CurrentRegion->getNameStr()); dumpPreservedSet(P); } - if (!skipThisRegion) { - // Manually check that this region is still healthy. This is done - // instead of relying on RegionInfo::verifyRegion since RegionInfo - // is a function pass and it's really expensive to verify every - // Region in the function every time. That level of checking can be - // enabled with the -verify-region-info option. - { - TimeRegion PassTimer(getPassTimer(P)); - CurrentRegion->verifyRegion(); - } - - // Then call the regular verifyAnalysis functions. - verifyPreservedAnalysis(P); + // Manually check that this region is still healthy. This is done + // instead of relying on RegionInfo::verifyRegion since RegionInfo + // is a function pass and it's really expensive to verify every + // Region in the function every time. That level of checking can be + // enabled with the -verify-region-info option. + { + TimeRegion PassTimer(getPassTimer(P)); + CurrentRegion->verifyRegion(); } + // Then call the regular verifyAnalysis functions. + verifyPreservedAnalysis(P); + if (LocalChanged) removeNotPreservedAnalysis(P); recordAvailableAnalysis(P); removeDeadPasses(P, - (!isPassDebuggingExecutionsOrMore() || skipThisRegion) ? - "" : CurrentRegion->getNameStr(), + (!isPassDebuggingExecutionsOrMore()) + ? "" + : CurrentRegion->getNameStr(), ON_REGION_MSG); - - if (skipThisRegion) - // Do not run other passes on this region. - break; } - // If the region was deleted, release all the region passes. This frees up - // some memory, and avoids trouble with the pass manager trying to call - // verifyAnalysis on them. - if (skipThisRegion) - for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { - Pass *P = getContainedPass(Index); - freePass(P, "", ON_REGION_MSG); - } - // Pop the region from queue after running all passes. RQ.pop_back(); - if (redoThisRegion) - RQ.push_back(CurrentRegion); - // Free all region nodes created in region passes. RI->clearNodeCache(); } From b3ca53e14274642274be8fe7db8b43dc3c146366 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Wed, 21 Oct 2020 14:11:16 -0400 Subject: [PATCH 077/179] [gn build] try to fix clang build after 37c030f81a9fdd 37c030f81a9fdd made it so that depending on //libcxx/include automatically added the copied header dir to the include search path. For some reason, clang can't build against the copied libcxx headers (it complains about ldiv_t not being a type). I don't have a mac to debug right now, but for the clang target this change was unintentional anyways -- only depend on the copies target, instead of on the target that also adjusts the include path. --- llvm/utils/gn/secondary/clang/tools/driver/BUILD.gn | 2 +- llvm/utils/gn/secondary/libcxx/include/BUILD.gn | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/llvm/utils/gn/secondary/clang/tools/driver/BUILD.gn b/llvm/utils/gn/secondary/clang/tools/driver/BUILD.gn index a91ee25224d85..24a95e1968bec 100644 --- a/llvm/utils/gn/secondary/clang/tools/driver/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/tools/driver/BUILD.gn @@ -86,7 +86,7 @@ executable("clang") { # clang. This is different from the CMake build, which requires devs to # explicitly build the "libcxx" target (which also needlessly compiles the # libcxx sources) to get a working compiler. - deps += [ "//libcxx/include" ] + deps += [ "//libcxx/include:copy_headers" ] } sources = [ "cc1_main.cpp", diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index 2695a58dff54b..1a59c562c79ff 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -217,7 +217,7 @@ if (current_toolchain == default_toolchain) { "wchar.h", "wctype.h", ] - deps = [] + deps = [ ":write_config_site" ] if (target_os != "mac" && target_os != "win") { # libcxx/cmake/Modules/HandleLibCXXABI.cmake sets # LIBCXX_CXX_ABI_HEADER_TARGET if the libcxx abi library either of @@ -264,7 +264,6 @@ config("include_config") { group("include") { deps = [ ":copy_headers($default_toolchain)", - ":write_config_site($default_toolchain)", ] public_configs = [ ":include_config" ] } From cb319b1b5d9346a67c2f2a829d3dace58dff5861 Mon Sep 17 00:00:00 2001 From: Saiyedul Islam Date: Wed, 21 Oct 2020 18:20:16 +0000 Subject: [PATCH 078/179] [NFC][OpenMP] Update description of OMPGridValues enums Update comments describing how OMPGridValues enums will be used in clang, deviceRTLs, and hsa and cuda plugins. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D86232 --- llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h b/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h index 41827181e30cf..6b48cc447e131 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h @@ -28,29 +28,30 @@ namespace omp { /// use the new array name. /// /// Example usage in clang: -/// const unsigned slot_size = ctx.GetTargetInfo().getGridValue(GV_Warp_Size); +/// const unsigned slot_size = +/// ctx.GetTargetInfo().getGridValue(llvm::omp::GVIDX::GV_Warp_Size); /// /// Example usage in libomptarget/deviceRTLs: -/// #include "OMPGridValues.h" +/// #include "llvm/Frontend/OpenMP/OMPGridValues.h" /// #ifdef __AMDGPU__ /// #define GRIDVAL AMDGPUGpuGridValues /// #else /// #define GRIDVAL NVPTXGpuGridValues /// #endif /// ... Then use this reference for GV_Warp_Size in the deviceRTL source. -/// GRIDVAL[GV_Warp_Size] +/// llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size] /// /// Example usage in libomptarget hsa plugin: -/// #include "OMPGridValues.h" +/// #include "llvm/Frontend/OpenMP/OMPGridValues.h" /// #define GRIDVAL AMDGPUGpuGridValues /// ... Then use this reference to access GV_Warp_Size in the hsa plugin. -/// GRIDVAL[GV_Warp_Size] +/// llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size] /// /// Example usage in libomptarget cuda plugin: -/// #include "OMPGridValues.h" +/// #include "llvm/Frontend/OpenMP/OMPGridValues.h" /// #define GRIDVAL NVPTXGpuGridValues /// ... Then use this reference to access GV_Warp_Size in the cuda plugin. -/// GRIDVAL[GV_Warp_Size] +/// llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size] /// enum GVIDX { /// The maximum number of workers in a kernel. From 729610a51a4581ea923e6fb2ca1bfa7287022ec7 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 21 Oct 2020 14:10:14 -0400 Subject: [PATCH 079/179] [ARM] add cost-kind tests for intrinsics; NFC This is a copy of the x86 file to provide better coverage; x86 may have strange overrides that mask changes in the generic model. --- .../CostModel/ARM/intrinsic-cost-kinds.ll | 246 ++++++++++++++++++ 1 file changed, 246 insertions(+) create mode 100644 llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll diff --git a/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll b/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll new file mode 100644 index 0000000000000..9e02d6f149ca4 --- /dev/null +++ b/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll @@ -0,0 +1,246 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -mtriple=armv8.1m.main -mattr=+mve.fp -cost-model -analyze -cost-kind=throughput < %s | FileCheck %s --check-prefix=THRU +; RUN: opt -mtriple=armv8.1m.main -mattr=+mve.fp -cost-model -analyze -cost-kind=latency < %s | FileCheck %s --check-prefix=LATE +; RUN: opt -mtriple=armv8.1m.main -mattr=+mve.fp -cost-model -analyze -cost-kind=code-size < %s | FileCheck %s --check-prefix=SIZE +; RUN: opt -mtriple=armv8.1m.main -mattr=+mve.fp -cost-model -analyze -cost-kind=size-latency < %s | FileCheck %s --check-prefix=SIZE_LATE + +; Test a cross-section of intrinsics for various cost-kinds. +; Other test files may check for accuracy of a particular intrinsic +; across subtargets or types. This is just a sanity check using an +; ARM target and a legal scalar type (i32/float) and/or an +; illegal vector type (16 x i32/float). + +declare i32 @llvm.smax.i32(i32, i32) +declare <16 x i32> @llvm.smax.v16i32(<16 x i32>, <16 x i32>) + +declare float @llvm.fmuladd.f32(float, float, float) +declare <16 x float> @llvm.fmuladd.v16f32(<16 x float>, <16 x float>, <16 x float>) + +declare i32 @llvm.cttz.i32(i32, i1) +declare <16 x i32> @llvm.cttz.v16i32(<16 x i32>, i1) + +declare i32 @llvm.ctlz.i32(i32, i1) +declare <16 x i32> @llvm.ctlz.v16i32(<16 x i32>, i1) + +declare i32 @llvm.fshl.i32(i32, i32, i32) +declare <16 x i32> @llvm.fshl.v16i32(<16 x i32>, <16 x i32>, <16 x i32>) + +declare <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*>, i32, <16 x i1>, <16 x float>) +declare void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float>, <16 x float*>, i32, <16 x i1>) +declare float @llvm.vector.reduce.fmax.v16f32(<16 x float>) + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1) + +define void @smax(i32 %a, i32 %b, <16 x i32> %va, <16 x i32> %vb) { +; THRU-LABEL: 'smax' +; THRU-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b) +; THRU-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb) +; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; LATE-LABEL: 'smax' +; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b) +; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb) +; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SIZE-LABEL: 'smax' +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SIZE_LATE-LABEL: 'smax' +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b) +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb) +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %s = call i32 @llvm.smax.i32(i32 %a, i32 %b) + %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb) + ret void +} + +define void @fmuladd(float %a, float %b, float %c, <16 x float> %va, <16 x float> %vb, <16 x float> %vc) { +; THRU-LABEL: 'fmuladd' +; THRU-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c) +; THRU-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc) +; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; LATE-LABEL: 'fmuladd' +; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c) +; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc) +; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SIZE-LABEL: 'fmuladd' +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SIZE_LATE-LABEL: 'fmuladd' +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c) +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc) +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c) + %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc) + ret void +} + +define void @cttz(i32 %a, <16 x i32> %va) { +; THRU-LABEL: 'cttz' +; THRU-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false) +; THRU-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false) +; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; LATE-LABEL: 'cttz' +; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false) +; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false) +; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SIZE-LABEL: 'cttz' +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false) +; SIZE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SIZE_LATE-LABEL: 'cttz' +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false) +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false) +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %s = call i32 @llvm.cttz.i32(i32 %a, i1 false) + %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false) + ret void +} + +define void @ctlz(i32 %a, <16 x i32> %va) { +; THRU-LABEL: 'ctlz' +; THRU-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true) +; THRU-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true) +; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; LATE-LABEL: 'ctlz' +; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true) +; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true) +; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SIZE-LABEL: 'ctlz' +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true) +; SIZE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SIZE_LATE-LABEL: 'ctlz' +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true) +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true) +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true) + %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true) + ret void +} + +define void @fshl(i32 %a, i32 %b, i32 %c, <16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) { +; THRU-LABEL: 'fshl' +; THRU-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) +; THRU-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) +; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; LATE-LABEL: 'fshl' +; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) +; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) +; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SIZE-LABEL: 'fshl' +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SIZE_LATE-LABEL: 'fshl' +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) + %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) + ret void +} + +define void @maskedgather(<16 x float*> %va, <16 x i1> %vb, <16 x float> %vc) { +; THRU-LABEL: 'maskedgather' +; THRU-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc) +; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; LATE-LABEL: 'maskedgather' +; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc) +; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SIZE-LABEL: 'maskedgather' +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SIZE_LATE-LABEL: 'maskedgather' +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc) +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc) + ret void +} + +define void @maskedscatter(<16 x float> %va, <16 x float*> %vb, <16 x i1> %vc) { +; THRU-LABEL: 'maskedscatter' +; THRU-NEXT: Cost Model: Found an estimated cost of 576 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc) +; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; LATE-LABEL: 'maskedscatter' +; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc) +; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SIZE-LABEL: 'maskedscatter' +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SIZE_LATE-LABEL: 'maskedscatter' +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc) +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc) + ret void +} + +define void @reduce_fmax(<16 x float> %va) { +; THRU-LABEL: 'reduce_fmax' +; THRU-NEXT: Cost Model: Found an estimated cost of 632 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) +; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; LATE-LABEL: 'reduce_fmax' +; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) +; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SIZE-LABEL: 'reduce_fmax' +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SIZE_LATE-LABEL: 'reduce_fmax' +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) + ret void +} + +define void @memcpy(i8* %a, i8* %b, i32 %c) { +; THRU-LABEL: 'memcpy' +; THRU-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 %b, i32 32, i1 false) +; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; LATE-LABEL: 'memcpy' +; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 %b, i32 32, i1 false) +; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SIZE-LABEL: 'memcpy' +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 %b, i32 32, i1 false) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SIZE_LATE-LABEL: 'memcpy' +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 %b, i32 32, i1 false) +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 %b, i32 32, i1 false) + ret void +} From c963bde0152a741d58b9e1e9ac485d8f2e6c6d58 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 21 Oct 2020 14:19:52 -0400 Subject: [PATCH 080/179] [CostModel] remove cost-kind predicate for scatter/gather cost This is similar in spirit to 01ea93d85d6e (memcpy) except that here the underlying caller assumptions were created for vectorizer use (throughput) rather than other passes. That meant ARM could have an enormous throughput cost with no corresponding size, latency, or blended cost increase. X86 has the same throughput restriction as the basic implementation, so it is still unchanged. Paraphrasing from the previous commit: This may not make sense for some callers, but at least now the costs will be consistently wrong instead of mysteriously wrong. Targets should provide better overrides if the current modeling is not accurate. --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 6 ------ llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll | 8 ++++---- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 2eec38bbcc5d9..817cabd6344e8 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1169,9 +1169,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { return thisT()->getMemcpyCost(ICA.getInst()); case Intrinsic::masked_scatter: { - // FIXME: all cost kinds should default to the same thing? - if (CostKind != TTI::TCK_RecipThroughput) - return BaseT::getIntrinsicInstrCost(ICA, CostKind); assert(VF == 1 && "Can't vectorize types here."); const Value *Mask = Args[3]; bool VarMask = !isa(Mask); @@ -1181,9 +1178,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { VarMask, Alignment, CostKind, I); } case Intrinsic::masked_gather: { - // FIXME: all cost kinds should default to the same thing? - if (CostKind != TTI::TCK_RecipThroughput) - return BaseT::getIntrinsicInstrCost(ICA, CostKind); assert(VF == 1 && "Can't vectorize types here."); const Value *Mask = Args[2]; bool VarMask = !isa(Mask); diff --git a/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll b/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll index 9e02d6f149ca4..bffaa98c82aa8 100644 --- a/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll +++ b/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll @@ -171,11 +171,11 @@ define void @maskedgather(<16 x float*> %va, <16 x i1> %vb, <16 x float> %vc) { ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'maskedgather' -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc) +; SIZE-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE_LATE-LABEL: 'maskedgather' -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc) +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc) ; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc) @@ -192,11 +192,11 @@ define void @maskedscatter(<16 x float> %va, <16 x float*> %vb, <16 x i1> %vc) { ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'maskedscatter' -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc) +; SIZE-NEXT: Cost Model: Found an estimated cost of 576 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE_LATE-LABEL: 'maskedscatter' -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc) +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 576 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc) ; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc) From f0292ede9bbf8a24607c926b0439db20c203607a Mon Sep 17 00:00:00 2001 From: Sean Silva Date: Thu, 15 Oct 2020 20:17:25 -0700 Subject: [PATCH 081/179] [mlir] Add structural type conversions for SCF dialect. A "structural" type conversion is one where the underlying ops are completely agnostic to the actual types involved and simply need to update their types. An example of this is scf.if -- the scf.if op and the corresponding scf.yield ops need to update their types accordingly to the TypeConverter, but otherwise don't care what type conversions are happening. To test the structural type conversions, it is convenient to define a bufferize pass for a dialect, which exercises them nicely. Differential Revision: https://reviews.llvm.org/D89757 --- mlir/include/mlir/Dialect/SCF/Passes.h | 3 + mlir/include/mlir/Dialect/SCF/Passes.td | 5 + mlir/include/mlir/Dialect/SCF/Transforms.h | 17 +++ mlir/include/mlir/Transforms/Bufferize.h | 9 ++ mlir/lib/Dialect/SCF/Transforms/Bufferize.cpp | 41 ++++++ .../lib/Dialect/SCF/Transforms/CMakeLists.txt | 2 + .../Transforms/StructuralTypeConversions.cpp | 117 ++++++++++++++++++ mlir/lib/Transforms/Bufferize.cpp | 4 + mlir/test/Dialect/SCF/bufferize.mlir | 42 +++++++ 9 files changed, 240 insertions(+) create mode 100644 mlir/lib/Dialect/SCF/Transforms/Bufferize.cpp create mode 100644 mlir/lib/Dialect/SCF/Transforms/StructuralTypeConversions.cpp create mode 100644 mlir/test/Dialect/SCF/bufferize.mlir diff --git a/mlir/include/mlir/Dialect/SCF/Passes.h b/mlir/include/mlir/Dialect/SCF/Passes.h index 7edb2444e87c0..f3dda9bec335c 100644 --- a/mlir/include/mlir/Dialect/SCF/Passes.h +++ b/mlir/include/mlir/Dialect/SCF/Passes.h @@ -17,6 +17,9 @@ namespace mlir { +/// Creates a pass that bufferizes the SCF dialect. +std::unique_ptr createSCFBufferizePass(); + /// Creates a pass that specializes for loop for unrolling and /// vectorization. std::unique_ptr createForLoopSpecializationPass(); diff --git a/mlir/include/mlir/Dialect/SCF/Passes.td b/mlir/include/mlir/Dialect/SCF/Passes.td index 6f3cf0e126423..611869466214a 100644 --- a/mlir/include/mlir/Dialect/SCF/Passes.td +++ b/mlir/include/mlir/Dialect/SCF/Passes.td @@ -11,6 +11,11 @@ include "mlir/Pass/PassBase.td" +def SCFBufferize : FunctionPass<"scf-bufferize"> { + let summary = "Bufferize the scf dialect."; + let constructor = "mlir::createSCFBufferizePass()"; +} + def SCFForLoopSpecialization : FunctionPass<"for-loop-specialization"> { let summary = "Specialize `for` loops for vectorization"; diff --git a/mlir/include/mlir/Dialect/SCF/Transforms.h b/mlir/include/mlir/Dialect/SCF/Transforms.h index 222ad6bf5584b..3164d337b4775 100644 --- a/mlir/include/mlir/Dialect/SCF/Transforms.h +++ b/mlir/include/mlir/Dialect/SCF/Transforms.h @@ -17,7 +17,11 @@ namespace mlir { +class ConversionTarget; +class MLIRContext; +class OwningRewritePatternList; class Region; +class TypeConverter; namespace scf { @@ -42,6 +46,19 @@ void naivelyFuseParallelOps(Region ®ion); /// The old loop is replaced with the new one. void tileParallelLoop(ParallelOp op, llvm::ArrayRef tileSizes); +/// Populates patterns for SCF structural type conversions and sets up the +/// provided ConversionTarget with the appropriate legality configuration for +/// the ops to get converted properly. +/// +/// A "structural" type conversion is one where the underlying ops are +/// completely agnostic to the actual types involved and simply need to update +/// their types. An example of this is scf.if -- the scf.if op and the +/// corresponding scf.yield ops need to update their types accordingly to the +/// TypeConverter, but otherwise don't care what type conversions are happening. +void populateSCFStructuralTypeConversionsAndLegality( + MLIRContext *context, TypeConverter &typeConverter, + OwningRewritePatternList &patterns, ConversionTarget &target); + } // namespace scf } // namespace mlir diff --git a/mlir/include/mlir/Transforms/Bufferize.h b/mlir/include/mlir/Transforms/Bufferize.h index ddc00893cc472..5bee53ef01ce6 100644 --- a/mlir/include/mlir/Transforms/Bufferize.h +++ b/mlir/include/mlir/Transforms/Bufferize.h @@ -143,6 +143,15 @@ class BufferizeTypeConverter : public TypeConverter { SmallVector decomposeTypeConversions; }; +/// Marks ops used by bufferization for type conversion materializations as +/// "legal" in the given ConversionTarget. +/// +/// This function should be called by all bufferization passes using +/// BufferizeTypeConverter so that materializations work proprely. One exception +/// is bufferization passes doing "full" conversions, where it can be desirable +/// for even the materializations to remain illegal so that they are eliminated. +void populateBufferizeMaterializationLegality(ConversionTarget &target); + /// Helper conversion pattern that encapsulates a BufferizeTypeConverter /// instance. template diff --git a/mlir/lib/Dialect/SCF/Transforms/Bufferize.cpp b/mlir/lib/Dialect/SCF/Transforms/Bufferize.cpp new file mode 100644 index 0000000000000..23cf72f6ed2a0 --- /dev/null +++ b/mlir/lib/Dialect/SCF/Transforms/Bufferize.cpp @@ -0,0 +1,41 @@ +//===- Bufferize.cpp - scf bufferize pass ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Transforms/Bufferize.h" +#include "PassDetail.h" +#include "mlir/Dialect/SCF/Passes.h" +#include "mlir/Dialect/SCF/SCF.h" +#include "mlir/Dialect/SCF/Transforms.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/Transforms/DialectConversion.h" + +using namespace mlir; +using namespace mlir::scf; + +namespace { +struct SCFBufferizePass : public SCFBufferizeBase { + void runOnFunction() override { + auto func = getOperation(); + auto *context = &getContext(); + + BufferizeTypeConverter typeConverter; + OwningRewritePatternList patterns; + ConversionTarget target(*context); + + populateBufferizeMaterializationLegality(target); + populateSCFStructuralTypeConversionsAndLegality(context, typeConverter, + patterns, target); + if (failed(applyPartialConversion(func, target, patterns))) + return signalPassFailure(); + }; +}; +} // end anonymous namespace + +std::unique_ptr mlir::createSCFBufferizePass() { + return std::make_unique(); +} diff --git a/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt b/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt index b3b20027896e1..6b516debac4a0 100644 --- a/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt @@ -1,7 +1,9 @@ add_mlir_dialect_library(MLIRSCFTransforms + Bufferize.cpp LoopSpecialization.cpp ParallelLoopFusion.cpp ParallelLoopTiling.cpp + StructuralTypeConversions.cpp Utils.cpp ADDITIONAL_HEADER_DIRS diff --git a/mlir/lib/Dialect/SCF/Transforms/StructuralTypeConversions.cpp b/mlir/lib/Dialect/SCF/Transforms/StructuralTypeConversions.cpp new file mode 100644 index 0000000000000..30a2272f39a24 --- /dev/null +++ b/mlir/lib/Dialect/SCF/Transforms/StructuralTypeConversions.cpp @@ -0,0 +1,117 @@ +//===- StructuralTypeConversions.cpp - scf structural type conversions ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "PassDetail.h" +#include "mlir/Dialect/SCF/Passes.h" +#include "mlir/Dialect/SCF/SCF.h" +#include "mlir/Dialect/SCF/Transforms.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/Transforms/DialectConversion.h" + +using namespace mlir; +using namespace mlir::scf; + +namespace { +class ConvertForOpTypes : public OpConversionPattern { +public: + using OpConversionPattern::OpConversionPattern; + LogicalResult + matchAndRewrite(ForOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override { + SmallVector newResultTypes; + for (auto type : op.getResultTypes()) { + Type newType = typeConverter->convertType(type); + if (!newType) + return rewriter.notifyMatchFailure(op, "not a 1:1 type conversion"); + newResultTypes.push_back(newType); + } + + // Clone and replace. + ForOp newOp = cast(rewriter.clone(*op.getOperation())); + newOp.getOperation()->setOperands(operands); + for (auto t : llvm::zip(newOp.getResults(), newResultTypes)) + std::get<0>(t).setType(std::get<1>(t)); + auto bodyArgs = newOp.getBody()->getArguments(); + for (auto t : llvm::zip(llvm::drop_begin(bodyArgs, 1), newResultTypes)) + std::get<0>(t).setType(std::get<1>(t)); + rewriter.replaceOp(op, newOp.getResults()); + + return success(); + } +}; +} // namespace + +namespace { +class ConvertIfOpTypes : public OpConversionPattern { +public: + using OpConversionPattern::OpConversionPattern; + LogicalResult + matchAndRewrite(IfOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override { + // TODO: Generalize this to any type conversion, not just 1:1. + // + // We need to implement something more sophisticated here that tracks which + // types convert to which other types and does the appropriate + // materialization logic. + // For example, it's possible that one result type converts to 0 types and + // another to 2 types, so newResultTypes would at least be the right size to + // not crash in the llvm::zip call below, but then we would set the the + // wrong type on the SSA values! These edge cases are also why we cannot + // safely use the TypeConverter::convertTypes helper here. + SmallVector newResultTypes; + for (auto type : op.getResultTypes()) { + Type newType = typeConverter->convertType(type); + if (!newType) + return rewriter.notifyMatchFailure(op, "not a 1:1 type conversion"); + newResultTypes.push_back(newType); + } + + // TODO: Write this with updateRootInPlace once the conversion infra + // supports source materializations on ops updated in place. + IfOp newOp = cast(rewriter.clone(*op.getOperation())); + newOp.getOperation()->setOperands(operands); + for (auto t : llvm::zip(newOp.getResults(), newResultTypes)) + std::get<0>(t).setType(std::get<1>(t)); + rewriter.replaceOp(op, newOp.getResults()); + return success(); + } +}; +} // namespace + +namespace { +// When the result types of a ForOp/IfOp get changed, the operand types of the +// corresponding yield op need to be changed. In order to trigger the +// appropriate type conversions / materializations, we need a dummy pattern. +class ConvertYieldOpTypes : public OpConversionPattern { +public: + using OpConversionPattern::OpConversionPattern; + LogicalResult + matchAndRewrite(scf::YieldOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override { + rewriter.replaceOpWithNewOp(op, operands); + return success(); + } +}; +} // namespace + +void mlir::scf::populateSCFStructuralTypeConversionsAndLegality( + MLIRContext *context, TypeConverter &typeConverter, + OwningRewritePatternList &patterns, ConversionTarget &target) { + patterns.insert( + typeConverter, context); + target.addDynamicallyLegalOp([&](Operation *op) { + return typeConverter.isLegal(op->getResultTypes()); + }); + target.addDynamicallyLegalOp([&](scf::YieldOp op) { + // We only have conversions for a subset of ops that use scf.yield + // terminators. + if (!isa(op.getParentOp())) + return true; + return typeConverter.isLegal(op.getOperandTypes()); + }); +} diff --git a/mlir/lib/Transforms/Bufferize.cpp b/mlir/lib/Transforms/Bufferize.cpp index 682fd9ff6719f..26eabe2b89473 100644 --- a/mlir/lib/Transforms/Bufferize.cpp +++ b/mlir/lib/Transforms/Bufferize.cpp @@ -72,6 +72,10 @@ BufferizeTypeConverter::getResultConversionKind(Type origin, Type converted) { return KeepAsFunctionResult; } +void mlir::populateBufferizeMaterializationLegality(ConversionTarget &target) { + target.addLegalOp(); +}; + //===----------------------------------------------------------------------===// // BufferizeFuncOpConverter //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/SCF/bufferize.mlir b/mlir/test/Dialect/SCF/bufferize.mlir new file mode 100644 index 0000000000000..01b353da83ed8 --- /dev/null +++ b/mlir/test/Dialect/SCF/bufferize.mlir @@ -0,0 +1,42 @@ +// RUN: mlir-opt %s -scf-bufferize | FileCheck %s + +// CHECK-LABEL: func @if( +// CHECK-SAME: %[[PRED:.*]]: i1, +// CHECK-SAME: %[[TRUE_TENSOR:.*]]: tensor, +// CHECK-SAME: %[[FALSE_TENSOR:.*]]: tensor) -> tensor { +// CHECK: %[[RESULT_MEMREF:.*]] = scf.if %[[PRED]] -> (memref) { +// CHECK: %[[TRUE_MEMREF:.*]] = tensor_to_memref %[[TRUE_TENSOR]] : memref +// CHECK: scf.yield %[[TRUE_MEMREF]] : memref +// CHECK: } else { +// CHECK: %[[FALSE_MEMREF:.*]] = tensor_to_memref %[[FALSE_TENSOR]] : memref +// CHECK: scf.yield %[[FALSE_MEMREF]] : memref +// CHECK: } +// CHECK: %[[RESULT_TENSOR:.*]] = tensor_load %[[RESULT_MEMREF:.*]] : memref +// CHECK: return %[[RESULT_TENSOR]] : tensor +// CHECK: } +func @if(%pred: i1, %true_val: tensor, %false_val: tensor) -> tensor { + %0 = scf.if %pred -> (tensor) { + scf.yield %true_val : tensor + } else { + scf.yield %false_val : tensor + } + return %0 : tensor +} + +// CHECK-LABEL: func @for( +// CHECK-SAME: %[[TENSOR:.*]]: tensor, +// CHECK-SAME: %[[LB:.*]]: index, %[[UB:.*]]: index, +// CHECK-SAME: %[[STEP:.*]]: index) -> tensor { +// CHECK: %[[MEMREF:.*]] = tensor_to_memref %[[TENSOR]] : memref +// CHECK: %[[RESULT_MEMREF:.*]] = scf.for %[[VAL_6:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] iter_args(%[[ITER:.*]] = %[[MEMREF]]) -> (memref) { +// CHECK: scf.yield %[[ITER]] : memref +// CHECK: } +// CHECK: %[[VAL_8:.*]] = tensor_load %[[VAL_9:.*]] : memref +// CHECK: return %[[VAL_8]] : tensor +// CHECK: } +func @for(%arg0: tensor, %lb: index, %ub: index, %step: index) -> tensor { + %ret = scf.for %iv = %lb to %ub step %step iter_args(%iter = %arg0) -> tensor { + scf.yield %iter : tensor + } + return %ret : tensor +} From 57b338c08a4942bda6e58c77870c657c53b6fb5b Mon Sep 17 00:00:00 2001 From: Sean Silva Date: Mon, 19 Oct 2020 15:59:03 -0700 Subject: [PATCH 082/179] [mlir][shape] Split out structural type conversions for shape dialect. A "structural" type conversion is one where the underlying ops are completely agnostic to the actual types involved and simply need to update their types. An example of this is shape.assuming -- the shape.assuming op and the corresponding shape.assuming_yield op need to update their types accordingly to the TypeConverter, but otherwise don't care what type conversions are happening. Also, the previous conversion code would not correctly materialize conversions for the shape.assuming_yield op. This should have caused a verification failure, but shape.assuming's verifier wasn't calling RegionBranchOpInterface::verifyTypes (which for reasons can't be called automatically as part of the trait verification, and requires being called manually). This patch also adds that verification. Differential Revision: https://reviews.llvm.org/D89833 --- .../include/mlir/Dialect/Shape/IR/ShapeOps.td | 1 + .../mlir/Dialect/Shape/Transforms/Passes.h | 21 ++++-- .../Dialect/Shape/Transforms/Bufferize.cpp | 62 ++-------------- .../Dialect/Shape/Transforms/CMakeLists.txt | 1 + .../Transforms/StructuralTypeConversions.cpp | 71 +++++++++++++++++++ mlir/test/Dialect/Shape/bufferize.mlir | 18 +++-- 6 files changed, 108 insertions(+), 66 deletions(-) create mode 100644 mlir/lib/Dialect/Shape/Transforms/StructuralTypeConversions.cpp diff --git a/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td b/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td index c6c52f2eb6ee0..6541cfadfc1b6 100644 --- a/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td +++ b/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td @@ -635,6 +635,7 @@ def Shape_AssumingOp : Shape_Op<"assuming", let printer = [{ return ::print(p, *this); }]; let parser = [{ return ::parse$cppClass(parser, result); }]; + let verifier = [{ return RegionBranchOpInterface::verifyTypes(*this); }]; let extraClassDeclaration = [{ // Inline the region into the region containing the AssumingOp and delete diff --git a/mlir/include/mlir/Dialect/Shape/Transforms/Passes.h b/mlir/include/mlir/Dialect/Shape/Transforms/Passes.h index f8976e9c75ebb..6df12998566ab 100644 --- a/mlir/include/mlir/Dialect/Shape/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/Shape/Transforms/Passes.h @@ -17,7 +17,8 @@ #include "mlir/Pass/Pass.h" namespace mlir { -class BufferizeTypeConverter; +class ConversionTarget; +class TypeConverter; } // namespace mlir namespace mlir { @@ -40,9 +41,21 @@ void populateRemoveShapeConstraintsPatterns(OwningRewritePatternList &patterns, MLIRContext *ctx); std::unique_ptr createRemoveShapeConstraintsPass(); -void populateShapeTypeConversionPatterns(MLIRContext *ctx, - BufferizeTypeConverter &converter, - OwningRewritePatternList &patterns); +/// Populates patterns for shape dialect structural type conversions and sets up +/// the provided ConversionTarget with the appropriate legality configuration +/// for the ops to get converted properly. +/// +/// A "structural" type conversion is one where the underlying ops are +/// completely agnostic to the actual types involved and simply need to update +/// their types consistently. An example of this is shape.assuming -- the +/// shape.assuming op and the corresponding shape.assuming_yield op need to have +/// consistent types, but the exact types don't matter. So all that we need to +/// do for a structural type conversion is to update both of their types +/// consistently to the new types prescribed by the TypeConverter. +void populateShapeStructuralTypeConversionsAndLegality( + MLIRContext *context, TypeConverter &typeConverter, + OwningRewritePatternList &patterns, ConversionTarget &target); + // Bufferizes shape dialect ops. // // Note that most shape dialect ops must be converted to std before diff --git a/mlir/lib/Dialect/Shape/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Shape/Transforms/Bufferize.cpp index bdebfa9a32d7b..20cd960e040fe 100644 --- a/mlir/lib/Dialect/Shape/Transforms/Bufferize.cpp +++ b/mlir/lib/Dialect/Shape/Transforms/Bufferize.cpp @@ -8,82 +8,30 @@ #include "mlir/Transforms/Bufferize.h" #include "PassDetail.h" -#include "mlir/Dialect/Shape/IR/Shape.h" #include "mlir/Dialect/Shape/Transforms/Passes.h" -#include "mlir/Dialect/StandardOps/IR/Ops.h" -#include "mlir/IR/Operation.h" -#include "mlir/IR/StandardTypes.h" #include "mlir/Pass/Pass.h" using namespace mlir; -using namespace mlir::shape; namespace { -// Propagate tensor to memref conversions through shape.assuming ops. -class TypeConversionAssumingOpConverter - : public BufferizeOpConversionPattern { -public: - using BufferizeOpConversionPattern< - shape::AssumingOp>::BufferizeOpConversionPattern; - - LogicalResult - matchAndRewrite(shape::AssumingOp assumingOp, ArrayRef operands, - ConversionPatternRewriter &rewriter) const final { - SmallVector newResultTypes; - newResultTypes.reserve(assumingOp.getNumResults()); - for (auto result : assumingOp.getResults()) { - auto originalType = result.getType(); - Type convertedType = converter.convertType(originalType); - newResultTypes.push_back(convertedType); - } - - auto newAssumingOp = rewriter.create( - assumingOp.getLoc(), newResultTypes, assumingOp.witness()); - - rewriter.replaceOp(assumingOp, newAssumingOp.getResults()); - rewriter.inlineRegionBefore(assumingOp.doRegion(), newAssumingOp.doRegion(), - newAssumingOp.doRegion().end()); - - return success(); - } -}; - struct ShapeBufferizePass : public ShapeBufferizeBase { void runOnFunction() override { MLIRContext &ctx = getContext(); OwningRewritePatternList patterns; - BufferizeTypeConverter converter; - populateShapeTypeConversionPatterns(&ctx, converter, patterns); - + BufferizeTypeConverter typeConverter; ConversionTarget target(getContext()); - auto isMemRefType = [](Type type) { return type.isa(); }; - target.addDynamicallyLegalOp([&](shape::AssumingOp op) { - return std::all_of(op.result_type_begin(), op.result_type_end(), - isMemRefType); - }); + populateBufferizeMaterializationLegality(target); + populateShapeStructuralTypeConversionsAndLegality(&ctx, typeConverter, + patterns, target); - if (failed(mlir::applyPartialConversion(getFunction(), target, patterns))) + if (failed(applyPartialConversion(getFunction(), target, patterns))) signalPassFailure(); } }; - } // namespace -/// Populates `patterns` with the conversion patterns of tensor->memref. -// -// TODO: Change this to work generally with any type conversions. -void mlir::populateShapeTypeConversionPatterns( - MLIRContext *context, BufferizeTypeConverter &converter, - OwningRewritePatternList &patterns) { - patterns.insert(context, converter); -} - -//===----------------------------------------------------------------------===// -// ShapeBufferizePass construction -//===----------------------------------------------------------------------===// - std::unique_ptr mlir::createShapeBufferizePass() { return std::make_unique(); } diff --git a/mlir/lib/Dialect/Shape/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Shape/Transforms/CMakeLists.txt index ce413f57d989e..123a3664df894 100644 --- a/mlir/lib/Dialect/Shape/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Shape/Transforms/CMakeLists.txt @@ -2,6 +2,7 @@ add_mlir_dialect_library(MLIRShapeOpsTransforms Bufferize.cpp RemoveShapeConstraints.cpp ShapeToShapeLowering.cpp + StructuralTypeConversions.cpp ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/ShapeOps/Transforms diff --git a/mlir/lib/Dialect/Shape/Transforms/StructuralTypeConversions.cpp b/mlir/lib/Dialect/Shape/Transforms/StructuralTypeConversions.cpp new file mode 100644 index 0000000000000..61e862836a733 --- /dev/null +++ b/mlir/lib/Dialect/Shape/Transforms/StructuralTypeConversions.cpp @@ -0,0 +1,71 @@ +//===- StructuralTypeConversions.cpp - Shape structural type conversions --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "PassDetail.h" +#include "mlir/Dialect/Shape/IR/Shape.h" +#include "mlir/Dialect/Shape/Transforms/Passes.h" +#include "mlir/Transforms/DialectConversion.h" + +using namespace mlir; +using namespace mlir::shape; + +namespace { +class ConvertAssumingOpTypes : public OpConversionPattern { +public: + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(AssumingOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + SmallVector newResultTypes; + newResultTypes.reserve(op.getNumResults()); + for (auto result : op.getResults()) { + auto originalType = result.getType(); + Type convertedType = getTypeConverter()->convertType(originalType); + newResultTypes.push_back(convertedType); + } + + auto newAssumingOp = + rewriter.create(op.getLoc(), newResultTypes, op.witness()); + + rewriter.replaceOp(op, newAssumingOp.getResults()); + rewriter.inlineRegionBefore(op.doRegion(), newAssumingOp.doRegion(), + newAssumingOp.doRegion().end()); + + return success(); + } +}; +} // namespace + +namespace { +class ConvertAssumingYieldOpTypes + : public OpConversionPattern { +public: + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(AssumingYieldOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + rewriter.replaceOpWithNewOp(op, operands); + return success(); + } +}; +} // namespace + +void mlir::populateShapeStructuralTypeConversionsAndLegality( + MLIRContext *context, TypeConverter &typeConverter, + OwningRewritePatternList &patterns, ConversionTarget &target) { + patterns.insert( + typeConverter, context); + target.addDynamicallyLegalOp([&](AssumingOp op) { + return typeConverter.isLegal(op.getResultTypes()); + }); + target.addDynamicallyLegalOp([&](AssumingYieldOp op) { + return typeConverter.isLegal(op.getOperandTypes()); + }); +} diff --git a/mlir/test/Dialect/Shape/bufferize.mlir b/mlir/test/Dialect/Shape/bufferize.mlir index 7393de1014666..cb65a5d42d4b3 100644 --- a/mlir/test/Dialect/Shape/bufferize.mlir +++ b/mlir/test/Dialect/Shape/bufferize.mlir @@ -1,12 +1,20 @@ // RUN: mlir-opt -split-input-file -shape-bufferize <%s | FileCheck %s // ----- -// Check that shape.assuming returns a memref. -// -// CHECK-LABEL: @shape_assuming_returns_memref -func @shape_assuming_returns_memref() { + +// CHECK-LABEL: func @shape_assuming() { +// CHECK: %[[WTRUE:.*]] = shape.const_witness true +// CHECK: %[[MEMREF:.*]] = shape.assuming %[[WTRUE]] -> (memref<2xf16>) { +// CHECK: %[[TENSOR_VAL:.*]] = "test.source"() : () -> tensor<2xf16> +// CHECK: %[[YIELDED_MEMREF:.*]] = tensor_to_memref %[[TENSOR_VAL]] : memref<2xf16> +// CHECK: shape.assuming_yield %[[YIELDED_MEMREF]] : memref<2xf16> +// CHECK: } +// CHECK: %[[TENSOR:.*]] = tensor_load %[[MEMREF:.*]] : memref<2xf16> +// CHECK: "test.sink"(%[[TENSOR]]) : (tensor<2xf16>) -> () +// CHECK: return +// CHECK: } +func @shape_assuming() { %0 = shape.const_witness true - // CHECK: shape.assuming %{{.*}} -> (memref<2xf16>) { %1 = shape.assuming %0 -> (tensor<2xf16>) { %2 = "test.source"() : () -> (tensor<2xf16>) shape.assuming_yield %2 : tensor<2xf16> From 74a58ec9c27f48eb26094667156934c6ca9d0012 Mon Sep 17 00:00:00 2001 From: Stella Laurenzo Date: Tue, 20 Oct 2020 23:20:04 -0700 Subject: [PATCH 083/179] [mlir][CAPI][Python] Plumb OpPrintingFlags to C and Python APIs. * Adds a new MlirOpPrintingFlags type and supporting accessors. * Adds a new mlirOperationPrintWithFlags function. * Adds a full featured python Operation.print method with all options and the ability to print directly to files/stdout in text or binary. * Adds an Operation.get_asm which delegates to print and returns a str or bytes. * Reworks Operation.__str__ to be based on get_asm. Differential Revision: https://reviews.llvm.org/D89848 --- mlir/include/mlir-c/IR.h | 42 +++++++ mlir/include/mlir/CAPI/IR.h | 1 + mlir/include/mlir/IR/OperationSupport.h | 8 +- mlir/lib/Bindings/Python/IRModules.cpp | 141 ++++++++++++++++++++-- mlir/lib/Bindings/Python/IRModules.h | 9 ++ mlir/lib/CAPI/IR/IR.cpp | 37 ++++++ mlir/test/Bindings/Python/ir_operation.py | 42 +++++++ mlir/test/CAPI/ir.c | 22 +++- 8 files changed, 287 insertions(+), 15 deletions(-) diff --git a/mlir/include/mlir-c/IR.h b/mlir/include/mlir-c/IR.h index 2aeb306f72567..a08fe77da37cd 100644 --- a/mlir/include/mlir-c/IR.h +++ b/mlir/include/mlir-c/IR.h @@ -50,6 +50,7 @@ extern "C" { DEFINE_C_API_STRUCT(MlirContext, void); DEFINE_C_API_STRUCT(MlirDialect, void); DEFINE_C_API_STRUCT(MlirOperation, void); +DEFINE_C_API_STRUCT(MlirOpPrintingFlags, void); DEFINE_C_API_STRUCT(MlirBlock, void); DEFINE_C_API_STRUCT(MlirRegion, void); @@ -228,6 +229,42 @@ void mlirOperationStateAddSuccessors(MlirOperationState *state, intptr_t n, void mlirOperationStateAddAttributes(MlirOperationState *state, intptr_t n, MlirNamedAttribute *attributes); +/*============================================================================*/ +/* Op Printing flags API. */ +/* While many of these are simple settings that could be represented in a */ +/* struct, they are wrapped in a heap allocated object and accessed via */ +/* functions to maximize the possibility of compatibility over time. */ +/*============================================================================*/ + +/** Creates new printing flags with defaults, intended for customization. + * Must be freed with a call to mlirOpPrintingFlagsDestroy(). */ +MlirOpPrintingFlags mlirOpPrintingFlagsCreate(); + +/** Destroys printing flags created with mlirOpPrintingFlagsCreate. */ +void mlirOpPrintingFlagsDestroy(MlirOpPrintingFlags flags); + +/** Enables the elision of large elements attributes by printing a lexically + * valid but otherwise meaningless form instead of the element data. The + * `largeElementLimit` is used to configure what is considered to be a "large" + * ElementsAttr by providing an upper limit to the number of elements. */ +void mlirOpPrintingFlagsElideLargeElementsAttrs(MlirOpPrintingFlags flags, + intptr_t largeElementLimit); + +/** Enable printing of debug information. If 'prettyForm' is set to true, + * debug information is printed in a more readable 'pretty' form. Note: The + * IR generated with 'prettyForm' is not parsable. */ +void mlirOpPrintingFlagsEnableDebugInfo(MlirOpPrintingFlags flags, + int prettyForm); + +/** Always print operations in the generic form. */ +void mlirOpPrintingFlagsPrintGenericOpForm(MlirOpPrintingFlags flags); + +/** Use local scope when printing the operation. This allows for using the + * printer in a more localized and thread-safe setting, but may not + * necessarily be identical to what the IR will look like when dumping + * the full module. */ +void mlirOpPrintingFlagsUseLocalScope(MlirOpPrintingFlags flags); + /*============================================================================*/ /* Operation API. */ /*============================================================================*/ @@ -298,6 +335,11 @@ int mlirOperationRemoveAttributeByName(MlirOperation op, const char *name); void mlirOperationPrint(MlirOperation op, MlirStringCallback callback, void *userData); +/** Same as mlirOperationPrint but accepts flags controlling the printing + * behavior. */ +void mlirOperationPrintWithFlags(MlirOperation op, MlirOpPrintingFlags flags, + MlirStringCallback callback, void *userData); + /** Prints an operation to stderr. */ void mlirOperationDump(MlirOperation op); diff --git a/mlir/include/mlir/CAPI/IR.h b/mlir/include/mlir/CAPI/IR.h index dce293d05588d..b3e481dfb6655 100644 --- a/mlir/include/mlir/CAPI/IR.h +++ b/mlir/include/mlir/CAPI/IR.h @@ -24,6 +24,7 @@ DEFINE_C_API_PTR_METHODS(MlirContext, mlir::MLIRContext) DEFINE_C_API_PTR_METHODS(MlirDialect, mlir::Dialect) DEFINE_C_API_PTR_METHODS(MlirOperation, mlir::Operation) DEFINE_C_API_PTR_METHODS(MlirBlock, mlir::Block) +DEFINE_C_API_PTR_METHODS(MlirOpPrintingFlags, mlir::OpPrintingFlags); DEFINE_C_API_PTR_METHODS(MlirRegion, mlir::Region) DEFINE_C_API_METHODS(MlirAttribute, mlir::Attribute) diff --git a/mlir/include/mlir/IR/OperationSupport.h b/mlir/include/mlir/IR/OperationSupport.h index e1e34f8da6c69..c745c1dedea3b 100644 --- a/mlir/include/mlir/IR/OperationSupport.h +++ b/mlir/include/mlir/IR/OperationSupport.h @@ -562,10 +562,10 @@ class OpPrintingFlags { OpPrintingFlags(); OpPrintingFlags(llvm::NoneType) : OpPrintingFlags() {} - /// Enable the elision of large elements attributes, by printing a '...' - /// instead of the element data. Note: The IR generated with this option is - /// not parsable. `largeElementLimit` is used to configure what is considered - /// to be a "large" ElementsAttr by providing an upper limit to the number of + /// Enables the elision of large elements attributes by printing a lexically + /// valid but otherwise meaningless form instead of the element data. The + /// `largeElementLimit` is used to configure what is considered to be a + /// "large" ElementsAttr by providing an upper limit to the number of /// elements. OpPrintingFlags &elideLargeElementsAttrs(int64_t largeElementLimit = 16); diff --git a/mlir/lib/Bindings/Python/IRModules.cpp b/mlir/lib/Bindings/Python/IRModules.cpp index db8a220c9d31d..014b312971b7d 100644 --- a/mlir/lib/Bindings/Python/IRModules.cpp +++ b/mlir/lib/Bindings/Python/IRModules.cpp @@ -64,12 +64,44 @@ static const char kContextGetUnknownLocationDocstring[] = static const char kContextGetFileLocationDocstring[] = R"(Gets a Location representing a file, line and column)"; +static const char kOperationPrintDocstring[] = + R"(Prints the assembly form of the operation to a file like object. + +Args: + file: The file like object to write to. Defaults to sys.stdout. + binary: Whether to write bytes (True) or str (False). Defaults to False. + large_elements_limit: Whether to elide elements attributes above this + number of elements. Defaults to None (no limit). + enable_debug_info: Whether to print debug/location information. Defaults + to False. + pretty_debug_info: Whether to format debug information for easier reading + by a human (warning: the result is unparseable). + print_generic_op_form: Whether to print the generic assembly forms of all + ops. Defaults to False. + use_local_Scope: Whether to print in a way that is more optimized for + multi-threaded access but may not be consistent with how the overall + module prints. +)"; + +static const char kOperationGetAsmDocstring[] = + R"(Gets the assembly form of the operation with all options available. + +Args: + binary: Whether to return a bytes (True) or str (False) object. Defaults to + False. + ... others ...: See the print() method for common keyword arguments for + configuring the printout. +Returns: + Either a bytes or str object, depending on the setting of the 'binary' + argument. +)"; + static const char kOperationStrDunderDocstring[] = - R"(Prints the assembly form of the operation with default options. + R"(Gets the assembly form of the operation with default options. If more advanced control over the assembly formatting or I/O options is needed, -use the dedicated print method, which supports keyword arguments to customize -behavior. +use the dedicated print or get_asm method, which supports keyword arguments to +customize behavior. )"; static const char kDumpDocstring[] = @@ -118,6 +150,35 @@ struct PyPrintAccumulator { } }; +/// Accumulates int a python file-like object, either writing text (default) +/// or binary. +class PyFileAccumulator { +public: + PyFileAccumulator(py::object fileObject, bool binary) + : pyWriteFunction(fileObject.attr("write")), binary(binary) {} + + void *getUserData() { return this; } + + MlirStringCallback getCallback() { + return [](const char *part, intptr_t size, void *userData) { + py::gil_scoped_acquire(); + PyFileAccumulator *accum = static_cast(userData); + if (accum->binary) { + // Note: Still has to copy and not avoidable with this API. + py::bytes pyBytes(part, size); + accum->pyWriteFunction(pyBytes); + } else { + py::str pyStr(part, size); // Decodes as UTF-8 by default. + accum->pyWriteFunction(pyStr); + } + }; + } + +private: + py::object pyWriteFunction; + bool binary; +}; + /// Accumulates into a python string from a method that is expected to make /// one (no more, no less) call to the callback (asserts internally on /// violation). @@ -712,6 +773,48 @@ void PyOperation::checkValid() { } } +void PyOperation::print(py::object fileObject, bool binary, + llvm::Optional largeElementsLimit, + bool enableDebugInfo, bool prettyDebugInfo, + bool printGenericOpForm, bool useLocalScope) { + checkValid(); + if (fileObject.is_none()) + fileObject = py::module::import("sys").attr("stdout"); + MlirOpPrintingFlags flags = mlirOpPrintingFlagsCreate(); + if (largeElementsLimit) + mlirOpPrintingFlagsElideLargeElementsAttrs(flags, *largeElementsLimit); + if (enableDebugInfo) + mlirOpPrintingFlagsEnableDebugInfo(flags, /*prettyForm=*/prettyDebugInfo); + if (printGenericOpForm) + mlirOpPrintingFlagsPrintGenericOpForm(flags); + + PyFileAccumulator accum(fileObject, binary); + py::gil_scoped_release(); + mlirOperationPrintWithFlags(get(), flags, accum.getCallback(), + accum.getUserData()); + mlirOpPrintingFlagsDestroy(flags); +} + +py::object PyOperation::getAsm(bool binary, + llvm::Optional largeElementsLimit, + bool enableDebugInfo, bool prettyDebugInfo, + bool printGenericOpForm, bool useLocalScope) { + py::object fileObject; + if (binary) { + fileObject = py::module::import("io").attr("BytesIO")(); + } else { + fileObject = py::module::import("io").attr("StringIO")(); + } + print(fileObject, /*binary=*/binary, + /*largeElementsLimit=*/largeElementsLimit, + /*enableDebugInfo=*/enableDebugInfo, + /*prettyDebugInfo=*/prettyDebugInfo, + /*printGenericOpForm=*/printGenericOpForm, + /*useLocalScope=*/useLocalScope); + + return fileObject.attr("getvalue")(); +} + //------------------------------------------------------------------------------ // PyAttribute. //------------------------------------------------------------------------------ @@ -745,7 +848,8 @@ namespace { /// CRTP base class for Python MLIR values that subclass Value and should be /// castable from it. The value hierarchy is one level deep and is not supposed /// to accommodate other levels unless core MLIR changes. -template class PyConcreteValue : public PyValue { +template +class PyConcreteValue : public PyValue { public: // Derived classes must define statics for: // IsAFunctionTy isaFunction @@ -1969,13 +2073,30 @@ void mlir::python::populateIRSubmodule(py::module &m) { .def( "__str__", [](PyOperation &self) { - self.checkValid(); - PyPrintAccumulator printAccum; - mlirOperationPrint(self.get(), printAccum.getCallback(), - printAccum.getUserData()); - return printAccum.join(); + return self.getAsm(/*binary=*/false, + /*largeElementsLimit=*/llvm::None, + /*enableDebugInfo=*/false, + /*prettyDebugInfo=*/false, + /*printGenericOpForm=*/false, + /*useLocalScope=*/false); }, - "Returns the assembly form of the operation."); + "Returns the assembly form of the operation.") + .def("print", &PyOperation::print, + // Careful: Lots of arguments must match up with print method. + py::arg("file") = py::none(), py::arg("binary") = false, + py::arg("large_elements_limit") = py::none(), + py::arg("enable_debug_info") = false, + py::arg("pretty_debug_info") = false, + py::arg("print_generic_op_form") = false, + py::arg("use_local_scope") = false, kOperationPrintDocstring) + .def("get_asm", &PyOperation::getAsm, + // Careful: Lots of arguments must match up with get_asm method. + py::arg("binary") = false, + py::arg("large_elements_limit") = py::none(), + py::arg("enable_debug_info") = false, + py::arg("pretty_debug_info") = false, + py::arg("print_generic_op_form") = false, + py::arg("use_local_scope") = false, kOperationGetAsmDocstring); // Mapping of PyRegion. py::class_(m, "Region") diff --git a/mlir/lib/Bindings/Python/IRModules.h b/mlir/lib/Bindings/Python/IRModules.h index 947b7343e35a3..b438e8ac408df 100644 --- a/mlir/lib/Bindings/Python/IRModules.h +++ b/mlir/lib/Bindings/Python/IRModules.h @@ -277,6 +277,15 @@ class PyOperation : public BaseContextObject { } void checkValid(); + /// Implements the bound 'print' method and helps with others. + void print(pybind11::object fileObject, bool binary, + llvm::Optional largeElementsLimit, bool enableDebugInfo, + bool prettyDebugInfo, bool printGenericOpForm, bool useLocalScope); + pybind11::object getAsm(bool binary, + llvm::Optional largeElementsLimit, + bool enableDebugInfo, bool prettyDebugInfo, + bool printGenericOpForm, bool useLocalScope); + private: PyOperation(PyMlirContextRef contextRef, MlirOperation operation); static PyOperationRef createInstance(PyMlirContextRef contextRef, diff --git a/mlir/lib/CAPI/IR/IR.cpp b/mlir/lib/CAPI/IR/IR.cpp index 104f6fda5c020..379770c8962f8 100644 --- a/mlir/lib/CAPI/IR/IR.cpp +++ b/mlir/lib/CAPI/IR/IR.cpp @@ -74,6 +74,36 @@ MlirStringRef mlirDialectGetNamespace(MlirDialect dialect) { return wrap(unwrap(dialect)->getNamespace()); } +/* ========================================================================== */ +/* Printing flags API. */ +/* ========================================================================== */ + +MlirOpPrintingFlags mlirOpPrintingFlagsCreate() { + return wrap(new OpPrintingFlags()); +} + +void mlirOpPrintingFlagsDestroy(MlirOpPrintingFlags flags) { + delete unwrap(flags); +} + +void mlirOpPrintingFlagsElideLargeElementsAttrs(MlirOpPrintingFlags flags, + intptr_t largeElementLimit) { + unwrap(flags)->elideLargeElementsAttrs(largeElementLimit); +} + +void mlirOpPrintingFlagsEnableDebugInfo(MlirOpPrintingFlags flags, + int prettyForm) { + unwrap(flags)->enableDebugInfo(/*prettyForm=*/prettyForm); +} + +void mlirOpPrintingFlagsPrintGenericOpForm(MlirOpPrintingFlags flags) { + unwrap(flags)->printGenericOpForm(); +} + +void mlirOpPrintingFlagsUseLocalScope(MlirOpPrintingFlags flags) { + unwrap(flags)->useLocalScope(); +} + /* ========================================================================== */ /* Location API. */ /* ========================================================================== */ @@ -282,6 +312,13 @@ void mlirOperationPrint(MlirOperation op, MlirStringCallback callback, stream.flush(); } +void mlirOperationPrintWithFlags(MlirOperation op, MlirOpPrintingFlags flags, + MlirStringCallback callback, void *userData) { + detail::CallbackOstream stream(callback, userData); + unwrap(op)->print(stream, *unwrap(flags)); + stream.flush(); +} + void mlirOperationDump(MlirOperation op) { return unwrap(op)->dump(); } /* ========================================================================== */ diff --git a/mlir/test/Bindings/Python/ir_operation.py b/mlir/test/Bindings/Python/ir_operation.py index e4dc71ac26efb..84f303ca570b0 100644 --- a/mlir/test/Bindings/Python/ir_operation.py +++ b/mlir/test/Bindings/Python/ir_operation.py @@ -1,6 +1,7 @@ # RUN: %PYTHON %s | FileCheck %s import gc +import io import itertools import mlir @@ -248,3 +249,44 @@ def testOperationResultList(): run(testOperationResultList) + + +# CHECK-LABEL: TEST: testOperationPrint +def testOperationPrint(): + ctx = mlir.ir.Context() + module = ctx.parse_module(r""" + func @f1(%arg0: i32) -> i32 { + %0 = constant dense<[1, 2, 3, 4]> : tensor<4xi32> + return %arg0 : i32 + } + """) + + # Test print to stdout. + # CHECK: return %arg0 : i32 + module.operation.print() + + # Test print to text file. + f = io.StringIO() + # CHECK: + # CHECK: return %arg0 : i32 + module.operation.print(file=f) + str_value = f.getvalue() + print(str_value.__class__) + print(f.getvalue()) + + # Test print to binary file. + f = io.BytesIO() + # CHECK: + # CHECK: return %arg0 : i32 + module.operation.print(file=f, binary=True) + bytes_value = f.getvalue() + print(bytes_value.__class__) + print(bytes_value) + + # Test get_asm with options. + # CHECK: value = opaque<"", "0xDEADBEEF"> : tensor<4xi32> + # CHECK: "std.return"(%arg0) : (i32) -> () -:4:7 + module.operation.print(large_elements_limit=2, enable_debug_info=True, + pretty_debug_info=True, print_generic_op_form=True, use_local_scope=True) + +run(testOperationPrint) diff --git a/mlir/test/CAPI/ir.c b/mlir/test/CAPI/ir.c index 7c86f403b3391..fa9a6258a4720 100644 --- a/mlir/test/CAPI/ir.c +++ b/mlir/test/CAPI/ir.c @@ -10,9 +10,9 @@ /* RUN: mlir-capi-ir-test 2>&1 | FileCheck %s */ +#include "mlir-c/IR.h" #include "mlir-c/AffineMap.h" #include "mlir-c/Diagnostics.h" -#include "mlir-c/IR.h" #include "mlir-c/Registration.h" #include "mlir-c/StandardAttributes.h" #include "mlir-c/StandardDialect.h" @@ -319,6 +319,25 @@ static void printFirstOfEach(MlirContext ctx, MlirOperation operation) { fprintf(stderr, "Removed attr is null: %d\n", mlirAttributeIsNull( mlirOperationGetAttributeByName(operation, "custom_attr"))); + + // Add a large attribute to verify printing flags. + int64_t eltsShape[] = {4}; + int32_t eltsData[] = {1, 2, 3, 4}; + mlirOperationSetAttributeByName( + operation, "elts", + mlirDenseElementsAttrInt32Get( + mlirRankedTensorTypeGet(1, eltsShape, mlirIntegerTypeGet(ctx, 32)), 4, + eltsData)); + MlirOpPrintingFlags flags = mlirOpPrintingFlagsCreate(); + mlirOpPrintingFlagsElideLargeElementsAttrs(flags, 2); + mlirOpPrintingFlagsPrintGenericOpForm(flags); + mlirOpPrintingFlagsEnableDebugInfo(flags, /*prettyForm=*/0); + mlirOpPrintingFlagsUseLocalScope(flags); + fprintf(stderr, "Op print with all flags: "); + mlirOperationPrintWithFlags(operation, flags, printToStderr, NULL); + fprintf(stderr, "\n"); + + mlirOpPrintingFlagsDestroy(flags); } /// Creates an operation with a region containing multiple blocks with @@ -991,6 +1010,7 @@ int main() { // CHECK: Remove attr: 1 // CHECK: Remove attr again: 0 // CHECK: Removed attr is null: 1 + // CHECK: Op print with all flags: %{{.*}} = "std.constant"() {elts = opaque<"", "0xDEADBEEF"> : tensor<4xi32>, value = 0 : index} : () -> index loc(unknown) // clang-format on mlirModuleDestroy(moduleOp); From b3881d01abcbc519032e0c942f90124f2bdba56a Mon Sep 17 00:00:00 2001 From: Mark de Wever Date: Wed, 21 Oct 2020 21:19:04 +0200 Subject: [PATCH 084/179] [NFC] Fixes Doxygen copy-paste error. --- clang/lib/Sema/TreeTransform.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 9d519616856bb..481fe98132c83 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -1305,7 +1305,7 @@ class TreeTransform { return SemaRef.ActOnLabelStmt(IdentLoc, L, ColonLoc, SubStmt); } - /// Build a new label statement. + /// Build a new attributed statement. /// /// By default, performs semantic analysis to build the new statement. /// Subclasses may override this routine to provide different behavior. From e8cce5ad892d21c9b51c656178a198cddb175ac4 Mon Sep 17 00:00:00 2001 From: Artur Pilipenko Date: Thu, 1 Oct 2020 20:01:39 -0700 Subject: [PATCH 085/179] [RS4GC] NFC. Preparatory refactoring to make GC parseable memcpy For GC parseable element atomic memcpy/memmove we'll need to shuffle statepoint arguments. Make it possible by storing the arguments as Value *, not Use *. --- llvm/include/llvm/IR/IRBuilder.h | 4 ++-- llvm/lib/IR/IRBuilder.cpp | 8 ++++---- llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp | 7 +++++-- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h index 8d11643da03a0..e988eecfefbbe 100644 --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -814,7 +814,7 @@ class IRBuilderBase { /// start a new statepoint sequence. CallInst *CreateGCStatepointCall(uint64_t ID, uint32_t NumPatchBytes, Value *ActualCallee, uint32_t Flags, - ArrayRef CallArgs, + ArrayRef CallArgs, Optional> TransitionArgs, Optional> DeoptArgs, ArrayRef GCArgs, @@ -843,7 +843,7 @@ class IRBuilderBase { InvokeInst *CreateGCStatepointInvoke( uint64_t ID, uint32_t NumPatchBytes, Value *ActualInvokee, BasicBlock *NormalDest, BasicBlock *UnwindDest, uint32_t Flags, - ArrayRef InvokeArgs, Optional> TransitionArgs, + ArrayRef InvokeArgs, Optional> TransitionArgs, Optional> DeoptArgs, ArrayRef GCArgs, const Twine &Name = ""); diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp index 982a158ff5c04..c0e4451f52003 100644 --- a/llvm/lib/IR/IRBuilder.cpp +++ b/llvm/lib/IR/IRBuilder.cpp @@ -653,10 +653,10 @@ CallInst *IRBuilderBase::CreateGCStatepointCall( CallInst *IRBuilderBase::CreateGCStatepointCall( uint64_t ID, uint32_t NumPatchBytes, Value *ActualCallee, uint32_t Flags, - ArrayRef CallArgs, Optional> TransitionArgs, + ArrayRef CallArgs, Optional> TransitionArgs, Optional> DeoptArgs, ArrayRef GCArgs, const Twine &Name) { - return CreateGCStatepointCallCommon( + return CreateGCStatepointCallCommon( this, ID, NumPatchBytes, ActualCallee, Flags, CallArgs, TransitionArgs, DeoptArgs, GCArgs, Name); } @@ -711,9 +711,9 @@ InvokeInst *IRBuilderBase::CreateGCStatepointInvoke( InvokeInst *IRBuilderBase::CreateGCStatepointInvoke( uint64_t ID, uint32_t NumPatchBytes, Value *ActualInvokee, BasicBlock *NormalDest, BasicBlock *UnwindDest, uint32_t Flags, - ArrayRef InvokeArgs, Optional> TransitionArgs, + ArrayRef InvokeArgs, Optional> TransitionArgs, Optional> DeoptArgs, ArrayRef GCArgs, const Twine &Name) { - return CreateGCStatepointInvokeCommon( + return CreateGCStatepointInvokeCommon( this, ID, NumPatchBytes, ActualInvokee, NormalDest, UnwindDest, Flags, InvokeArgs, TransitionArgs, DeoptArgs, GCArgs, Name); } diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index c344c6c684776..0769b23f903b2 100644 --- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -1487,7 +1487,9 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */ uint32_t NumPatchBytes = 0; uint32_t Flags = uint32_t(StatepointFlags::None); - ArrayRef CallArgs(Call->arg_begin(), Call->arg_end()); + SmallVector CallArgs; + for (Value *Arg : Call->args()) + CallArgs.push_back(Arg); Optional> DeoptArgs; if (auto Bundle = Call->getOperandBundle(LLVMContext::OB_deopt)) DeoptArgs = Bundle->Inputs; @@ -1520,7 +1522,8 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */ Value *CallTarget = Call->getCalledOperand(); if (Function *F = dyn_cast(CallTarget)) { - if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize) { + auto IID = F->getIntrinsicID(); + if (IID == Intrinsic::experimental_deoptimize) { // Calls to llvm.experimental.deoptimize are lowered to calls to the // __llvm_deoptimize symbol. We want to resolve this now, since the // verifier does not allow taking the address of an intrinsic function. From aa6c305344e5e96bc12b1b9b88cd852a290de914 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Fri, 9 Oct 2020 17:58:27 -0700 Subject: [PATCH 086/179] [LowerMatrixIntrinsics][NewPM] Fix PreservedAnalyses result PreservedCFGCheckerInstrumentation was saying that LowerMatrixIntrinsics didn't properly preserve CFG even though it claimed to. The legacy pass says it doesn't. Match the legacy pass's preserved analyses. Reviewed By: thakis Differential Revision: https://reviews.llvm.org/D89175 --- llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp | 3 ++- llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused.ll | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp index 053d96430b6b3..3b4ee2d1841d0 100644 --- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp +++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp @@ -1945,7 +1945,8 @@ PreservedAnalyses LowerMatrixIntrinsicsPass::run(Function &F, LowerMatrixIntrinsics LMT(F, TTI, &AA, &DT, &LI, &ORE); if (LMT.Visit()) { PreservedAnalyses PA; - PA.preserveSet(); + PA.preserve(); + PA.preserve(); return PA; } return PreservedAnalyses::all(); diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused.ll index 7300a5ff77030..edc3034c4917f 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused.ll @@ -1,4 +1,5 @@ ; RUN: opt -lower-matrix-intrinsics -fuse-matrix-use-loops=false -fuse-matrix-tile-size=2 -matrix-allow-contract -force-fuse-matrix -instcombine -verify-dom-info %s -S | FileCheck %s +; RUN: opt -passes=lower-matrix-intrinsics,instcombine -fuse-matrix-use-loops=false -fuse-matrix-tile-size=2 -matrix-allow-contract -force-fuse-matrix -verify-dom-info %s -S | FileCheck %s ; REQUIRES: aarch64-registered-target From 31bc55d602a09241f6d126ca9315e7ddc3d92555 Mon Sep 17 00:00:00 2001 From: Teresa Johnson Date: Sat, 17 Oct 2020 10:46:19 -0700 Subject: [PATCH 087/179] [sanitizer] Convert PrintModuleMap to DumpProcessMap As discussed in the review for D87120 (specifically at https://reviews.llvm.org/D87120#inline-831939), clean up PrintModuleMap and DumpProcessMap usage differences. The former is only implemented for Mac OSX, whereas the latter is implemented for all OSes. The former is called by asan and tsan, and the latter by hwasan and now memprof, under the same option. Simply rename the PrintModuleMap implementation for Mac to DumpProcessMap, remove other empty PrintModuleMap implementations, and convert asan/tsan to new name. The existing posix DumpProcessMap is disabled for SANITIZER_MAC. Differential Revision: https://reviews.llvm.org/D89630 --- compiler-rt/lib/asan/asan_report.cpp | 3 ++- compiler-rt/lib/asan/asan_rtl.cpp | 3 ++- compiler-rt/lib/sanitizer_common/sanitizer_common.h | 1 - compiler-rt/lib/sanitizer_common/sanitizer_fuchsia.cpp | 2 -- compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp | 2 -- compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp | 2 +- compiler-rt/lib/sanitizer_common/sanitizer_posix.cpp | 2 ++ compiler-rt/lib/sanitizer_common/sanitizer_rtems.cpp | 2 -- compiler-rt/lib/sanitizer_common/sanitizer_win.cpp | 2 -- compiler-rt/lib/tsan/rtl/tsan_report.cpp | 3 ++- compiler-rt/lib/tsan/rtl/tsan_rtl.cpp | 3 ++- 11 files changed, 11 insertions(+), 14 deletions(-) diff --git a/compiler-rt/lib/asan/asan_report.cpp b/compiler-rt/lib/asan/asan_report.cpp index 4b4db1db6dc9c..03f1ed2b01866 100644 --- a/compiler-rt/lib/asan/asan_report.cpp +++ b/compiler-rt/lib/asan/asan_report.cpp @@ -151,7 +151,8 @@ class ScopedInErrorReport { if (common_flags()->print_cmdline) PrintCmdline(); - if (common_flags()->print_module_map == 2) PrintModuleMap(); + if (common_flags()->print_module_map == 2) + DumpProcessMap(); // Copy the message buffer so that we could start logging without holding a // lock that gets aquired during printing. diff --git a/compiler-rt/lib/asan/asan_rtl.cpp b/compiler-rt/lib/asan/asan_rtl.cpp index 115733cdaa48e..7b5a929963c6a 100644 --- a/compiler-rt/lib/asan/asan_rtl.cpp +++ b/compiler-rt/lib/asan/asan_rtl.cpp @@ -45,7 +45,8 @@ static void AsanDie() { // Don't die twice - run a busy loop. while (1) { } } - if (common_flags()->print_module_map >= 1) PrintModuleMap(); + if (common_flags()->print_module_map >= 1) + DumpProcessMap(); if (flags()->sleep_before_dying) { Report("Sleeping for %d second(s)\n", flags()->sleep_before_dying); SleepForSeconds(flags()->sleep_before_dying); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common.h b/compiler-rt/lib/sanitizer_common/sanitizer_common.h index 040db6fc2a16b..bce24d68045b1 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common.h @@ -254,7 +254,6 @@ void UpdateProcessName(); void CacheBinaryName(); void DisableCoreDumperIfNecessary(); void DumpProcessMap(); -void PrintModuleMap(); const char *GetEnv(const char *name); bool SetEnv(const char *name, const char *value); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_fuchsia.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_fuchsia.cpp index 6d1ad79467706..a6034ebda936e 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_fuchsia.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_fuchsia.cpp @@ -105,8 +105,6 @@ void SetAlternateSignalStack() {} void UnsetAlternateSignalStack() {} void InitTlsSize() {} -void PrintModuleMap() {} - bool SignalContext::IsStackOverflow() const { return false; } void SignalContext::DumpAllRegisters(void *context) { UNIMPLEMENTED(); } const char *SignalContext::Describe() const { UNIMPLEMENTED(); } diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp index 024bef3c05829..0a1bb1771ecab 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp @@ -2218,8 +2218,6 @@ void CheckMPROTECT() { #endif } -void PrintModuleMap() { } - void CheckNoDeepBind(const char *filename, int flag) { #ifdef RTLD_DEEPBIND if (flag & RTLD_DEEPBIND) { diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp index b1271120c0041..ab7005c086930 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp @@ -1300,7 +1300,7 @@ void FormatUUID(char *out, uptr size, const u8 *uuid) { uuid[12], uuid[13], uuid[14], uuid[15]); } -void PrintModuleMap() { +void DumpProcessMap() { Printf("Process module map:\n"); MemoryMappingLayout memory_mapping(false); InternalMmapVector modules; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_posix.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_posix.cpp index b8b75c20d9f9a..2e080098283fd 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_posix.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_posix.cpp @@ -239,6 +239,7 @@ bool MemoryRangeIsAvailable(uptr range_start, uptr range_end) { return true; } +#if !SANITIZER_MAC void DumpProcessMap() { MemoryMappingLayout proc_maps(/*cache_enabled*/true); const sptr kBufSize = 4095; @@ -252,6 +253,7 @@ void DumpProcessMap() { Report("End of process memory map.\n"); UnmapOrDie(filename, kBufSize); } +#endif const char *GetPwd() { return GetEnv("PWD"); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_rtems.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_rtems.cpp index 29bcfcfa6f158..d58bd08fb1a89 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_rtems.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_rtems.cpp @@ -108,8 +108,6 @@ void SetAlternateSignalStack() {} void UnsetAlternateSignalStack() {} void InitTlsSize() {} -void PrintModuleMap() {} - void SignalContext::DumpAllRegisters(void *context) {} const char *DescribeSignalOrException(int signo) { UNIMPLEMENTED(); } diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp index 53a537d398475..6c02b331daffd 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp @@ -491,8 +491,6 @@ void DumpProcessMap() { } #endif -void PrintModuleMap() { } - void DisableCoreDumperIfNecessary() { // Do nothing. } diff --git a/compiler-rt/lib/tsan/rtl/tsan_report.cpp b/compiler-rt/lib/tsan/rtl/tsan_report.cpp index 4892c446c104b..968c7b97553c6 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_report.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_report.cpp @@ -386,7 +386,8 @@ void PrintReport(const ReportDesc *rep) { ReportErrorSummary(rep_typ_str, frame->info); } - if (common_flags()->print_module_map == 2) PrintModuleMap(); + if (common_flags()->print_module_map == 2) + DumpProcessMap(); Printf("==================\n"); } diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp index 7b37ed50681e6..3d721eb95a2cf 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp @@ -446,7 +446,8 @@ void MaybeSpawnBackgroundThread() { int Finalize(ThreadState *thr) { bool failed = false; - if (common_flags()->print_module_map == 1) PrintModuleMap(); + if (common_flags()->print_module_map == 1) + DumpProcessMap(); if (flags()->atexit_sleep_ms > 0 && ThreadCount(thr) > 1) SleepForMillis(flags()->atexit_sleep_ms); From 8d9466a3850b8f213b817075ede0fdec8526b7a7 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Wed, 7 Oct 2020 15:15:09 -0700 Subject: [PATCH 088/179] [BlockExtract][NewPM] Port -extract-blocks to NPM Reviewed By: thakis Differential Revision: https://reviews.llvm.org/D89015 --- llvm/include/llvm/InitializePasses.h | 2 +- .../llvm/Transforms/IPO/BlockExtractor.h | 25 ++++++ llvm/lib/Passes/PassBuilder.cpp | 1 + llvm/lib/Passes/PassRegistry.def | 1 + llvm/lib/Transforms/IPO/BlockExtractor.cpp | 79 ++++++++++++------- llvm/lib/Transforms/IPO/IPO.cpp | 2 +- .../BlockExtractor/extract-blocks.ll | 2 + 7 files changed, 83 insertions(+), 29 deletions(-) create mode 100644 llvm/include/llvm/Transforms/IPO/BlockExtractor.h diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 920c6913af4b3..00ef0ea44d091 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -83,7 +83,7 @@ void initializeBasicBlockSectionsPass(PassRegistry &); void initializeBDCELegacyPassPass(PassRegistry&); void initializeBarrierNoopPass(PassRegistry&); void initializeBasicAAWrapperPassPass(PassRegistry&); -void initializeBlockExtractorPass(PassRegistry &); +void initializeBlockExtractorLegacyPassPass(PassRegistry &); void initializeBlockFrequencyInfoWrapperPassPass(PassRegistry&); void initializeBoundsCheckingLegacyPassPass(PassRegistry&); void initializeBranchFolderPassPass(PassRegistry&); diff --git a/llvm/include/llvm/Transforms/IPO/BlockExtractor.h b/llvm/include/llvm/Transforms/IPO/BlockExtractor.h new file mode 100644 index 0000000000000..deeb5ebe23d9a --- /dev/null +++ b/llvm/include/llvm/Transforms/IPO/BlockExtractor.h @@ -0,0 +1,25 @@ +//===- BlockExtractor.h - Extracts blocks into their own functions --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass extracts the specified basic blocks from the module into their +// own functions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_IPO_BLOCKEXTRACTOR_H +#define LLVM_TRANSFORMS_IPO_BLOCKEXTRACTOR_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { +struct BlockExtractorPass : PassInfoMixin { + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; +} // namespace llvm + +#endif // LLVM_TRANSFORMS_IPO_BLOCKEXTRACTOR_H diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 923c1bc11a981..199e459e3d6cd 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -84,6 +84,7 @@ #include "llvm/Transforms/IPO/AlwaysInliner.h" #include "llvm/Transforms/IPO/ArgumentPromotion.h" #include "llvm/Transforms/IPO/Attributor.h" +#include "llvm/Transforms/IPO/BlockExtractor.h" #include "llvm/Transforms/IPO/CalledValuePropagation.h" #include "llvm/Transforms/IPO/ConstantMerge.h" #include "llvm/Transforms/IPO/CrossDSOCFI.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 5a0b37738c09a..0cac7c4607522 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -50,6 +50,7 @@ MODULE_PASS("constmerge", ConstantMergePass()) MODULE_PASS("cross-dso-cfi", CrossDSOCFIPass()) MODULE_PASS("deadargelim", DeadArgumentEliminationPass()) MODULE_PASS("elim-avail-extern", EliminateAvailableExternallyPass()) +MODULE_PASS("extract-blocks", BlockExtractorPass()) MODULE_PASS("forceattrs", ForceFunctionAttrsPass()) MODULE_PASS("function-import", FunctionImportPass()) MODULE_PASS("globaldce", GlobalDCEPass()) diff --git a/llvm/lib/Transforms/IPO/BlockExtractor.cpp b/llvm/lib/Transforms/IPO/BlockExtractor.cpp index 1d1300c6cd1d0..c6e222a096eb1 100644 --- a/llvm/lib/Transforms/IPO/BlockExtractor.cpp +++ b/llvm/lib/Transforms/IPO/BlockExtractor.cpp @@ -11,10 +11,12 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/IPO/BlockExtractor.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" @@ -38,13 +40,10 @@ cl::opt BlockExtractorEraseFuncs("extract-blocks-erase-funcs", cl::desc("Erase the existing functions"), cl::Hidden); namespace { -class BlockExtractor : public ModulePass { - SmallVector, 4> GroupsOfBlocks; - bool EraseFunctions; - /// Map a function name to groups of blocks. - SmallVector>, 4> - BlocksByName; - +class BlockExtractor { +public: + BlockExtractor(bool EraseFunctions) : EraseFunctions(EraseFunctions) {} + bool runOnModule(Module &M); void init(const SmallVectorImpl> &GroupsOfBlocksToExtract) { for (const SmallVectorImpl &GroupOfBlocks : @@ -57,11 +56,26 @@ class BlockExtractor : public ModulePass { loadFile(); } +private: + SmallVector, 4> GroupsOfBlocks; + bool EraseFunctions; + /// Map a function name to groups of blocks. + SmallVector>, 4> + BlocksByName; + + void loadFile(); + void splitLandingPadPreds(Function &F); +}; + +class BlockExtractorLegacyPass : public ModulePass { + BlockExtractor BE; + bool runOnModule(Module &M) override; + public: static char ID; - BlockExtractor(const SmallVectorImpl &BlocksToExtract, - bool EraseFunctions) - : ModulePass(ID), EraseFunctions(EraseFunctions) { + BlockExtractorLegacyPass(const SmallVectorImpl &BlocksToExtract, + bool EraseFunctions) + : ModulePass(ID), BE(EraseFunctions) { // We want one group per element of the input list. SmallVector, 4> MassagedGroupsOfBlocks; for (BasicBlock *BB : BlocksToExtract) { @@ -69,39 +83,38 @@ class BlockExtractor : public ModulePass { NewGroup.push_back(BB); MassagedGroupsOfBlocks.push_back(NewGroup); } - init(MassagedGroupsOfBlocks); + BE.init(MassagedGroupsOfBlocks); } - BlockExtractor(const SmallVectorImpl> - &GroupsOfBlocksToExtract, - bool EraseFunctions) - : ModulePass(ID), EraseFunctions(EraseFunctions) { - init(GroupsOfBlocksToExtract); + BlockExtractorLegacyPass(const SmallVectorImpl> + &GroupsOfBlocksToExtract, + bool EraseFunctions) + : ModulePass(ID), BE(EraseFunctions) { + BE.init(GroupsOfBlocksToExtract); } - BlockExtractor() : BlockExtractor(SmallVector(), false) {} - bool runOnModule(Module &M) override; - -private: - void loadFile(); - void splitLandingPadPreds(Function &F); + BlockExtractorLegacyPass() + : BlockExtractorLegacyPass(SmallVector(), false) {} }; + } // end anonymous namespace -char BlockExtractor::ID = 0; -INITIALIZE_PASS(BlockExtractor, "extract-blocks", +char BlockExtractorLegacyPass::ID = 0; +INITIALIZE_PASS(BlockExtractorLegacyPass, "extract-blocks", "Extract basic blocks from module", false, false) -ModulePass *llvm::createBlockExtractorPass() { return new BlockExtractor(); } +ModulePass *llvm::createBlockExtractorPass() { + return new BlockExtractorLegacyPass(); +} ModulePass *llvm::createBlockExtractorPass( const SmallVectorImpl &BlocksToExtract, bool EraseFunctions) { - return new BlockExtractor(BlocksToExtract, EraseFunctions); + return new BlockExtractorLegacyPass(BlocksToExtract, EraseFunctions); } ModulePass *llvm::createBlockExtractorPass( const SmallVectorImpl> &GroupsOfBlocksToExtract, bool EraseFunctions) { - return new BlockExtractor(GroupsOfBlocksToExtract, EraseFunctions); + return new BlockExtractorLegacyPass(GroupsOfBlocksToExtract, EraseFunctions); } /// Gets all of the blocks specified in the input file. @@ -233,3 +246,15 @@ bool BlockExtractor::runOnModule(Module &M) { return Changed; } + +bool BlockExtractorLegacyPass::runOnModule(Module &M) { + return BE.runOnModule(M); +} + +PreservedAnalyses BlockExtractorPass::run(Module &M, + ModuleAnalysisManager &AM) { + BlockExtractor BE(false); + BE.init(SmallVector, 0>()); + return BE.runOnModule(M) ? PreservedAnalyses::none() + : PreservedAnalyses::all(); +} diff --git a/llvm/lib/Transforms/IPO/IPO.cpp b/llvm/lib/Transforms/IPO/IPO.cpp index 4fb9c511d2f55..c9f0d300cbb9c 100644 --- a/llvm/lib/Transforms/IPO/IPO.cpp +++ b/llvm/lib/Transforms/IPO/IPO.cpp @@ -40,7 +40,7 @@ void llvm::initializeIPO(PassRegistry &Registry) { initializeInferFunctionAttrsLegacyPassPass(Registry); initializeInternalizeLegacyPassPass(Registry); initializeLoopExtractorLegacyPassPass(Registry); - initializeBlockExtractorPass(Registry); + initializeBlockExtractorLegacyPassPass(Registry); initializeSingleLoopExtractorPass(Registry); initializeLowerTypeTestsPass(Registry); initializeMergeFunctionsLegacyPassPass(Registry); diff --git a/llvm/test/Transforms/BlockExtractor/extract-blocks.ll b/llvm/test/Transforms/BlockExtractor/extract-blocks.ll index e720953a1e7a6..beee6356decdf 100644 --- a/llvm/test/Transforms/BlockExtractor/extract-blocks.ll +++ b/llvm/test/Transforms/BlockExtractor/extract-blocks.ll @@ -2,6 +2,8 @@ ; RUN: echo 'foo bb20' >> %t ; RUN: opt -S -extract-blocks -extract-blocks-file=%t %s | FileCheck %s --check-prefix=CHECK-NO-ERASE ; RUN: opt -S -extract-blocks -extract-blocks-file=%t -extract-blocks-erase-funcs %s | FileCheck %s --check-prefix=CHECK-ERASE +; RUN: opt -S -passes=extract-blocks -extract-blocks-file=%t %s | FileCheck %s --check-prefix=CHECK-NO-ERASE +; RUN: opt -S -passes=extract-blocks -extract-blocks-file=%t -extract-blocks-erase-funcs %s | FileCheck %s --check-prefix=CHECK-ERASE ; CHECK-NO-ERASE: @foo( ; CHECK-NO-ERASE: @foo.bb9( From 272279a1c0fa4142e599aecdc75ef68720a81607 Mon Sep 17 00:00:00 2001 From: Hafiz Abid Qadeer Date: Wed, 21 Oct 2020 20:56:24 +0100 Subject: [PATCH 089/179] [libcxxabi] Stub out 'sleep' call when _LIBCXXABI_HAS_NO_THREADS is defined. While running this test on a bare metal target, I got an error as 'sleep' was not available on that system. As 'sleep' call is not doing anything useful for cases when _LIBCXXABI_HAS_NO_THREADS is defined. This patch puts it under this check. Reviewed By: ldionne Differential Revision: https://reviews.llvm.org/D89871 --- libcxxabi/test/test_exception_storage.pass.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libcxxabi/test/test_exception_storage.pass.cpp b/libcxxabi/test/test_exception_storage.pass.cpp index 5c29c4524a41d..bf423667b7734 100644 --- a/libcxxabi/test/test_exception_storage.pass.cpp +++ b/libcxxabi/test/test_exception_storage.pass.cpp @@ -29,7 +29,9 @@ void *thread_code (void *parm) { std::printf("Got different globals!\n"); *result = (size_t) glob1; +#ifndef _LIBCXXABI_HAS_NO_THREADS sleep ( 1 ); +#endif return parm; } From 1d1217c4ea115ac204f666a31686787503623dfa Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Wed, 21 Oct 2020 13:14:44 -0700 Subject: [PATCH 090/179] [test] Fix no-wrap-symbolic-becount.ll under NPM --- llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll b/llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll index 77c1017edb1a6..af3092e570529 100644 --- a/llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll +++ b/llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -S -analyze -scalar-evolution | FileCheck %s +; RUN: opt < %s -S -analyze -scalar-evolution -enable-new-pm=0 | FileCheck %s +; RUN: opt < %s -S -passes='print' 2>&1 | FileCheck %s define i32 @test_01(i32 %start, i32* %p, i32* %q) { ; CHECK-LABEL: 'test_01' From ba4768c966581658465f7366df9b0811f468a2d7 Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Sun, 20 Sep 2020 23:16:08 -0700 Subject: [PATCH 091/179] [c++20] For P0732R2 / P1907R1: Basic frontend support for class types as non-type template parameters. Create a unique TemplateParamObjectDecl instance for each such value, representing the globally unique template parameter object to which the template parameter refers. No IR generation support yet; that will follow in a separate patch. --- clang/include/clang/AST/ASTContext.h | 8 + clang/include/clang/AST/DeclTemplate.h | 73 ++++++++- clang/include/clang/AST/RecursiveASTVisitor.h | 2 + clang/include/clang/Basic/DeclNodes.td | 1 + .../clang/Basic/DiagnosticSemaKinds.td | 10 +- clang/include/clang/Sema/Initialization.h | 21 ++- clang/include/clang/Sema/Sema.h | 3 +- .../include/clang/Serialization/ASTBitCodes.h | 3 + clang/lib/AST/ASTContext.cpp | 32 +++- clang/lib/AST/ASTDiagnostic.cpp | 9 +- clang/lib/AST/DeclBase.cpp | 1 + clang/lib/AST/DeclTemplate.cpp | 33 ++++ clang/lib/AST/ExprClassification.cpp | 4 +- clang/lib/AST/ExprConstant.cpp | 23 ++- clang/lib/AST/ItaniumMangle.cpp | 7 +- clang/lib/AST/MicrosoftMangle.cpp | 7 +- clang/lib/AST/StmtPrinter.cpp | 4 + clang/lib/AST/TemplateBase.cpp | 7 + clang/lib/CodeGen/CGDecl.cpp | 1 + clang/lib/CodeGen/CGExpr.cpp | 4 + clang/lib/CodeGen/CGExprConstant.cpp | 3 + clang/lib/CodeGen/CodeGenModule.cpp | 6 + clang/lib/CodeGen/CodeGenModule.h | 4 + clang/lib/Sema/SemaExpr.cpp | 16 +- clang/lib/Sema/SemaInit.cpp | 21 ++- clang/lib/Sema/SemaOverload.cpp | 35 +++-- clang/lib/Sema/SemaTemplate.cpp | 52 ++++++- clang/lib/Sema/SemaTemplateDeduction.cpp | 106 +++++++------ .../lib/Sema/SemaTemplateInstantiateDecl.cpp | 5 + clang/lib/Serialization/ASTCommon.cpp | 1 + clang/lib/Serialization/ASTReaderDecl.cpp | 19 ++- clang/lib/Serialization/ASTWriterDecl.cpp | 7 + clang/test/CXX/drs/dr3xx.cpp | 4 +- clang/test/CXX/temp/temp.param/p8-cxx20.cpp | 28 ++++ clang/test/PCH/cxx20-template-args.cpp | 25 +++ clang/test/SemaCXX/cxx17-compat.cpp | 10 ++ .../SemaTemplate/temp_arg_nontype_cxx20.cpp | 146 +++++++++++++++++- clang/tools/libclang/CIndex.cpp | 1 + 38 files changed, 645 insertions(+), 97 deletions(-) create mode 100644 clang/test/CXX/temp/temp.param/p8-cxx20.cpp create mode 100644 clang/test/PCH/cxx20-template-args.cpp diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h index d8c0b624ef71e..30d910c93022f 100644 --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -289,6 +289,9 @@ class ASTContext : public RefCountedBase { /// Mapping from GUIDs to the corresponding MSGuidDecl. mutable llvm::FoldingSet MSGuidDecls; + /// Mapping from APValues to the corresponding TemplateParamObjects. + mutable llvm::FoldingSet TemplateParamObjectDecls; + /// A cache mapping a string value to a StringLiteral object with the same /// value. /// @@ -2868,6 +2871,11 @@ class ASTContext : public RefCountedBase { /// GUID value. MSGuidDecl *getMSGuidDecl(MSGuidDeclParts Parts) const; + /// Return the template parameter object of the given type with the given + /// value. + TemplateParamObjectDecl *getTemplateParamObjectDecl(QualType T, + const APValue &V) const; + /// Parses the target attributes passed in, and returns only the ones that are /// valid feature names. ParsedTargetAttr filterFunctionTargetAttrs(const TargetAttr *TD) const; diff --git a/clang/include/clang/AST/DeclTemplate.h b/clang/include/clang/AST/DeclTemplate.h index 5f35257390912..7a175db8cb160 100644 --- a/clang/include/clang/AST/DeclTemplate.h +++ b/clang/include/clang/AST/DeclTemplate.h @@ -3226,7 +3226,7 @@ class VarTemplateDecl : public RedeclarableTemplateDecl { static bool classofKind(Kind K) { return K == VarTemplate; } }; -// \brief Declaration of a C++2a concept. +/// Declaration of a C++2a concept. class ConceptDecl : public TemplateDecl, public Mergeable { protected: Expr *ConstraintExpr; @@ -3255,6 +3255,9 @@ class ConceptDecl : public TemplateDecl, public Mergeable { return isa(getTemplateParameters()->getParam(0)); } + ConceptDecl *getCanonicalDecl() override { return getFirstDecl(); } + const ConceptDecl *getCanonicalDecl() const { return getFirstDecl(); } + // Implement isa/cast/dyncast/etc. static bool classof(const Decl *D) { return classofKind(D->getKind()); } static bool classofKind(Kind K) { return K == Concept; } @@ -3264,6 +3267,74 @@ class ConceptDecl : public TemplateDecl, public Mergeable { friend class ASTDeclWriter; }; +/// A template parameter object. +/// +/// Template parameter objects represent values of class type used as template +/// arguments. There is one template parameter object for each such distinct +/// value used as a template argument across the program. +/// +/// \code +/// struct A { int x, y; }; +/// template struct S; +/// S s1; +/// S s2; // same type, argument is same TemplateParamObjectDecl. +/// \endcode +class TemplateParamObjectDecl : public ValueDecl, + public Mergeable, + public llvm::FoldingSetNode { +private: + /// The value of this template parameter object. + APValue Value; + + TemplateParamObjectDecl(DeclContext *DC, QualType T, const APValue &V) + : ValueDecl(TemplateParamObject, DC, SourceLocation(), DeclarationName(), + T), + Value(V) {} + + static TemplateParamObjectDecl *Create(const ASTContext &C, QualType T, + const APValue &V); + static TemplateParamObjectDecl *CreateDeserialized(ASTContext &C, + unsigned ID); + + /// Only ASTContext::getTemplateParamObjectDecl and deserialization + /// create these. + friend class ASTContext; + friend class ASTReader; + friend class ASTDeclReader; + +public: + /// Print this template parameter object in a human-readable format. + void printName(llvm::raw_ostream &OS) const override; + + /// Print this object as an equivalent expression. + void printAsExpr(llvm::raw_ostream &OS) const; + + /// Print this object as an initializer suitable for a variable of the + /// object's type. + void printAsInit(llvm::raw_ostream &OS) const; + + const APValue &getValue() const { return Value; } + + static void Profile(llvm::FoldingSetNodeID &ID, QualType T, + const APValue &V) { + ID.AddPointer(T.getCanonicalType().getAsOpaquePtr()); + V.profile(ID); + } + void Profile(llvm::FoldingSetNodeID &ID) { + Profile(ID, getType(), getValue()); + } + + TemplateParamObjectDecl *getCanonicalDecl() override { + return getFirstDecl(); + } + const TemplateParamObjectDecl *getCanonicalDecl() const { + return getFirstDecl(); + } + + static bool classof(const Decl *D) { return classofKind(D->getKind()); } + static bool classofKind(Kind K) { return K == TemplateParamObject; } +}; + inline NamedDecl *getAsNamedDecl(TemplateParameter P) { if (auto *PD = P.dyn_cast()) return PD; diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index 6f07b92f25323..5e83cded06520 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -1970,6 +1970,8 @@ DEF_TRAVERSE_DECL(MSPropertyDecl, { TRY_TO(TraverseDeclaratorHelper(D)); }) DEF_TRAVERSE_DECL(MSGuidDecl, {}) +DEF_TRAVERSE_DECL(TemplateParamObjectDecl, {}) + DEF_TRAVERSE_DECL(FieldDecl, { TRY_TO(TraverseDeclaratorHelper(D)); if (D->isBitField()) diff --git a/clang/include/clang/Basic/DeclNodes.td b/clang/include/clang/Basic/DeclNodes.td index 866988ee3f015..4771a3549426b 100644 --- a/clang/include/clang/Basic/DeclNodes.td +++ b/clang/include/clang/Basic/DeclNodes.td @@ -41,6 +41,7 @@ def Named : DeclNode; def OMPDeclareReduction : DeclNode, DeclContext; def OMPDeclareMapper : DeclNode, DeclContext; def MSGuid : DeclNode; + def TemplateParamObject : DeclNode; def Declarator : DeclNode; def Field : DeclNode; def ObjCIvar : DeclNode; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index ee942afd440ae..641d3e73905ef 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -1994,8 +1994,8 @@ def err_destructor_template : Error< // C++ initialization def err_init_conversion_failed : Error< - "cannot initialize %select{a variable|a parameter|return object|" - "statement expression result|an " + "cannot initialize %select{a variable|a parameter|template parameter|" + "return object|statement expression result|an " "exception object|a member subobject|an array element|a new value|a value|a " "base class|a constructor delegation|a vector element|a block element|a " "block element|a complex element|a lambda capture|a compound literal " @@ -2137,7 +2137,7 @@ def warn_unsequenced_mod_use : Warning< "unsequenced modification and access to %0">, InGroup; def select_initialized_entity_kind : TextSubstitution< - "%select{copying variable|copying parameter|" + "%select{copying variable|copying parameter|initializing template parameter|" "returning object|initializing statement expression result|" "throwing object|copying member subobject|copying array element|" "allocating object|copying temporary|initializing base subobject|" @@ -4492,6 +4492,10 @@ def note_not_structural_rvalue_ref_field : Note< def note_not_structural_subobject : Note< "%0 is not a structural type because it has a " "%select{non-static data member|base class}1 of non-structural type %2">; +def warn_cxx17_compat_template_nontype_parm_type : Warning< + "non-type template parameter of type %0 is incompatible with " + "C++ standards before C++20">, + DefaultIgnore, InGroup; def warn_cxx14_compat_template_nontype_parm_auto_type : Warning< "non-type template parameters declared with %0 are incompatible with C++ " "standards before C++17">, diff --git a/clang/include/clang/Sema/Initialization.h b/clang/include/clang/Sema/Initialization.h index ca9e0a198cb91..6976e7c95c8b2 100644 --- a/clang/include/clang/Sema/Initialization.h +++ b/clang/include/clang/Sema/Initialization.h @@ -55,6 +55,9 @@ class alignas(8) InitializedEntity { /// The entity being initialized is a function parameter. EK_Parameter, + /// The entity being initialized is a non-type template parameter. + EK_TemplateParameter, + /// The entity being initialized is the result of a function call. EK_Result, @@ -175,7 +178,8 @@ class alignas(8) InitializedEntity { }; union { - /// When Kind == EK_Variable, EK_Member or EK_Binding, the variable. + /// When Kind == EK_Variable, EK_Member, EK_Binding, or + /// EK_TemplateParameter, the variable, binding, or template parameter. VD Variable; /// When Kind == EK_RelatedResult, the ObjectiveC method where @@ -281,6 +285,17 @@ class alignas(8) InitializedEntity { return Entity; } + /// Create the initialization entity for a template parameter. + static InitializedEntity + InitializeTemplateParameter(QualType T, NonTypeTemplateParmDecl *Param) { + InitializedEntity Entity; + Entity.Kind = EK_TemplateParameter; + Entity.Type = T; + Entity.Parent = nullptr; + Entity.Variable = {Param, false, false}; + return Entity; + } + /// Create the initialization entity for the result of a function. static InitializedEntity InitializeResult(SourceLocation ReturnLoc, QualType Type, bool NRVO) { @@ -441,6 +456,10 @@ class alignas(8) InitializedEntity { getKind() == EK_Parameter_CF_Audited); } + bool isParamOrTemplateParamKind() const { + return isParameterKind() || getKind() == EK_TemplateParameter; + } + /// Determine whether this initialization consumes the /// parameter. bool isParameterConsumed() const { diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 7ced3d9a7bd4a..18f115003f389 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -3369,7 +3369,8 @@ class Sema final { ExprResult CheckConvertedConstantExpression(Expr *From, QualType T, llvm::APSInt &Value, CCEKind CCE); ExprResult CheckConvertedConstantExpression(Expr *From, QualType T, - APValue &Value, CCEKind CCE); + APValue &Value, CCEKind CCE, + NamedDecl *Dest = nullptr); /// Abstract base class used to perform a contextual implicit /// conversion from an expression to any type passing a filter. diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index c6f9f1d1a08f4..a2ae032ed5b8c 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -1281,6 +1281,9 @@ class TypeIdx { /// A MSGuidDecl record. DECL_MS_GUID, + /// A TemplateParamObjectDecl record. + DECL_TEMPLATE_PARAM_OBJECT, + /// A VarDecl record. DECL_VAR, diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 32bb3f991d959..4f3566a66962c 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -4870,9 +4870,16 @@ TemplateArgument ASTContext::getInjectedTemplateArg(NamedDecl *Param) { Arg = TemplateArgument(ArgType); } else if (auto *NTTP = dyn_cast(Param)) { + QualType T = + NTTP->getType().getNonPackExpansionType().getNonLValueExprType(*this); + // For class NTTPs, ensure we include the 'const' so the type matches that + // of a real template argument. + // FIXME: It would be more faithful to model this as something like an + // lvalue-to-rvalue conversion applied to a const-qualified lvalue. + if (T->isRecordType()) + T.addConst(); Expr *E = new (*this) DeclRefExpr( - *this, NTTP, /*enclosing*/ false, - NTTP->getType().getNonPackExpansionType().getNonLValueExprType(*this), + *this, NTTP, /*enclosing*/ false, T, Expr::getValueKindForType(NTTP->getType()), NTTP->getLocation()); if (NTTP->isParameterPack()) @@ -10962,6 +10969,27 @@ ASTContext::getMSGuidDecl(MSGuidDecl::Parts Parts) const { return New; } +TemplateParamObjectDecl * +ASTContext::getTemplateParamObjectDecl(QualType T, const APValue &V) const { + assert(T->isRecordType() && "template param object of unexpected type"); + + // C++ [temp.param]p8: + // [...] a static storage duration object of type 'const T' [...] + T.addConst(); + + llvm::FoldingSetNodeID ID; + TemplateParamObjectDecl::Profile(ID, T, V); + + void *InsertPos; + if (TemplateParamObjectDecl *Existing = + TemplateParamObjectDecls.FindNodeOrInsertPos(ID, InsertPos)) + return Existing; + + TemplateParamObjectDecl *New = TemplateParamObjectDecl::Create(*this, T, V); + TemplateParamObjectDecls.InsertNode(New, InsertPos); + return New; +} + bool ASTContext::AtomicUsesUnsupportedLibcall(const AtomicExpr *E) const { const llvm::Triple &T = getTargetInfo().getTriple(); if (!T.isOSDarwin()) diff --git a/clang/lib/AST/ASTDiagnostic.cpp b/clang/lib/AST/ASTDiagnostic.cpp index 99ce46e83123e..2bc731717b982 100644 --- a/clang/lib/AST/ASTDiagnostic.cpp +++ b/clang/lib/AST/ASTDiagnostic.cpp @@ -1834,7 +1834,14 @@ class TemplateDiff { if (VD) { if (AddressOf) OS << "&"; - OS << VD->getName(); + else if (auto *TPO = dyn_cast(VD)) { + // FIXME: Diffing the APValue would be neat. + // FIXME: Suppress this and use the full name of the declaration if the + // parameter is a pointer or reference. + TPO->printAsInit(OS); + return; + } + VD->printName(OS); return; } diff --git a/clang/lib/AST/DeclBase.cpp b/clang/lib/AST/DeclBase.cpp index f2502c327a118..0656efae5489f 100644 --- a/clang/lib/AST/DeclBase.cpp +++ b/clang/lib/AST/DeclBase.cpp @@ -835,6 +835,7 @@ unsigned Decl::getIdentifierNamespaceForKind(Kind DeclKind) { case ExternCContext: case Decomposition: case MSGuid: + case TemplateParamObject: case UsingDirective: case BuiltinTemplate: diff --git a/clang/lib/AST/DeclTemplate.cpp b/clang/lib/AST/DeclTemplate.cpp index d99a9c19c506e..9918377070c3a 100644 --- a/clang/lib/AST/DeclTemplate.cpp +++ b/clang/lib/AST/DeclTemplate.cpp @@ -1431,3 +1431,36 @@ void TypeConstraint::print(llvm::raw_ostream &OS, PrintingPolicy Policy) const { OS << ">"; } } + +TemplateParamObjectDecl *TemplateParamObjectDecl::Create(const ASTContext &C, + QualType T, + const APValue &V) { + DeclContext *DC = C.getTranslationUnitDecl(); + auto *TPOD = new (C, DC) TemplateParamObjectDecl(DC, T, V); + C.addDestruction(&TPOD->Value); + return TPOD; +} + +TemplateParamObjectDecl * +TemplateParamObjectDecl::CreateDeserialized(ASTContext &C, unsigned ID) { + auto *TPOD = new (C, ID) TemplateParamObjectDecl(nullptr, QualType(), APValue()); + C.addDestruction(&TPOD->Value); + return TPOD; +} + +void TemplateParamObjectDecl::printName(llvm::raw_ostream &OS) const { + OS << "