From 4d855bce8fc2cfb3ef46d90f85a2f5b370e3b6e5 Mon Sep 17 00:00:00 2001 From: Bruce Forstall Date: Fri, 29 Oct 2021 11:14:00 -0700 Subject: [PATCH] Loop refactoring and commenting improvements - Remove unneeded FIRST concept in loop table; it was always equal to TOP. - Rename optMarkLoopsBlocks to optScaleLoopBlocks to more accurately describe what it does. More consistently report block scaling in the dump - Create optMarkLoopHeads. This was refactored out of fgRemoveUnreachableBlocks so it can be called in a more logical location (at the beginning of optFindLoops), and only does one thing. - fgMakeOutgoingStructArgCopy: remove unused `argIndex` argument; reorder calls to fgMightHaveLoop. - Update and write a bunch of comments; convert away from `/* */` style comments. --- src/coreclr/jit/compiler.cpp | 9 +- src/coreclr/jit/compiler.h | 49 +-- src/coreclr/jit/compiler.hpp | 38 +- src/coreclr/jit/fgbasic.cpp | 29 +- src/coreclr/jit/fgdiagnostic.cpp | 4 +- src/coreclr/jit/fgopt.cpp | 66 +-- src/coreclr/jit/fgprofile.cpp | 2 +- src/coreclr/jit/flowgraph.cpp | 4 +- src/coreclr/jit/loopcloning.cpp | 7 +- src/coreclr/jit/lsra.cpp | 4 +- src/coreclr/jit/morph.cpp | 28 +- src/coreclr/jit/optimizer.cpp | 669 ++++++++++++++++--------------- 12 files changed, 437 insertions(+), 472 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index e35cf807734ed..a75d1618fd390 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4877,14 +4877,12 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // DoPhase(this, PHASE_COMPUTE_REACHABILITY, &Compiler::fgComputeReachability); - // Discover and classify natural loops - // (e.g. mark iterative loops as such). Also marks loop blocks - // and sets bbWeight to the loop nesting levels + // Discover and classify natural loops (e.g. mark iterative loops as such). Also marks loop blocks + // and sets bbWeight to the loop nesting levels. // DoPhase(this, PHASE_FIND_LOOPS, &Compiler::optFindLoops); - // Clone loops with optimization opportunities, and - // choose one based on dynamic condition evaluation. + // Clone loops with optimization opportunities, and choose one based on dynamic condition evaluation. // DoPhase(this, PHASE_CLONE_LOOPS, &Compiler::optCloneLoops); @@ -8675,7 +8673,6 @@ void cLoop(Compiler* comp, Compiler::LoopDsc* loop) static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called printf("===================================================================== Loop %u\n", sequenceNumber++); printf("HEAD " FMT_BB "\n", loop->lpHead->bbNum); - printf("FIRST " FMT_BB "\n", loop->lpFirst->bbNum); printf("TOP " FMT_BB "\n", loop->lpTop->bbNum); printf("ENTRY " FMT_BB "\n", loop->lpEntry->bbNum); if (loop->lpExitCnt == 1) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 18fce48d72b34..647dd8c8babcd 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6324,10 +6324,7 @@ class Compiler GenTreeCall* fgMorphArgs(GenTreeCall* call); GenTreeArgList* fgMorphArgList(GenTreeArgList* args, MorphAddrContext* mac); - void fgMakeOutgoingStructArgCopy(GenTreeCall* call, - GenTreeCall::Use* args, - unsigned argIndex, - CORINFO_CLASS_HANDLE copyBlkClass); + void fgMakeOutgoingStructArgCopy(GenTreeCall* call, GenTreeCall::Use* args, CORINFO_CLASS_HANDLE copyBlkClass); GenTree* fgMorphLocalVar(GenTree* tree, bool forceRemorph); @@ -6714,18 +6711,16 @@ class Compiler // bbNext order) sequence of basic blocks. (At times, we may require the blocks in a loop to be "properly numbered" // in bbNext order; we use comparisons on the bbNum to decide order.) // The blocks that define the body are - // first <= top <= entry <= bottom . + // top <= entry <= bottom // The "head" of the loop is a block outside the loop that has "entry" as a successor. We only support loops with a // single 'head' block. The meanings of these blocks are given in the definitions below. Also see the picture at // Compiler::optFindNaturalLoops(). struct LoopDsc { - BasicBlock* lpHead; // HEAD of the loop (not part of the looping of the loop) -- has ENTRY as a successor. - BasicBlock* lpFirst; // FIRST block (in bbNext order) reachable within this loop. (May be part of a nested - // loop, but not the outer loop.) - BasicBlock* lpTop; // loop TOP (the back edge from lpBottom reaches here) (in most cases FIRST and TOP are the - // same) - BasicBlock* lpEntry; // the ENTRY in the loop (in most cases TOP or BOTTOM) + BasicBlock* lpHead; // HEAD of the loop (not part of the looping of the loop) -- has ENTRY as a successor. + BasicBlock* lpTop; // loop TOP (the back edge from lpBottom reaches here). Lexically first block (in bbNext + // order) reachable in this loop. + BasicBlock* lpEntry; // the ENTRY in the loop (in most cases TOP or BOTTOM) BasicBlock* lpBottom; // loop BOTTOM (from here we have a back edge to the TOP) BasicBlock* lpExit; // if a single exit loop this is the EXIT (in most cases BOTTOM) @@ -6826,51 +6821,50 @@ class Compiler // Returns "true" iff "*this" contains the blk. bool lpContains(BasicBlock* blk) const { - return lpFirst->bbNum <= blk->bbNum && blk->bbNum <= lpBottom->bbNum; + return lpTop->bbNum <= blk->bbNum && blk->bbNum <= lpBottom->bbNum; } // Returns "true" iff "*this" (properly) contains the range [first, bottom] (allowing firsts // to be equal, but requiring bottoms to be different.) bool lpContains(BasicBlock* first, BasicBlock* bottom) const { - return lpFirst->bbNum <= first->bbNum && bottom->bbNum < lpBottom->bbNum; + return lpTop->bbNum <= first->bbNum && bottom->bbNum < lpBottom->bbNum; } // Returns "true" iff "*this" (properly) contains "lp2" (allowing firsts to be equal, but requiring // bottoms to be different.) bool lpContains(const LoopDsc& lp2) const { - return lpContains(lp2.lpFirst, lp2.lpBottom); + return lpContains(lp2.lpTop, lp2.lpBottom); } // Returns "true" iff "*this" is (properly) contained by the range [first, bottom] // (allowing firsts to be equal, but requiring bottoms to be different.) bool lpContainedBy(BasicBlock* first, BasicBlock* bottom) const { - return first->bbNum <= lpFirst->bbNum && lpBottom->bbNum < bottom->bbNum; + return first->bbNum <= lpTop->bbNum && lpBottom->bbNum < bottom->bbNum; } // Returns "true" iff "*this" is (properly) contained by "lp2" // (allowing firsts to be equal, but requiring bottoms to be different.) bool lpContainedBy(const LoopDsc& lp2) const { - return lpContains(lp2.lpFirst, lp2.lpBottom); + return lpContains(lp2.lpTop, lp2.lpBottom); } // Returns "true" iff "*this" is disjoint from the range [top, bottom]. bool lpDisjoint(BasicBlock* first, BasicBlock* bottom) const { - return bottom->bbNum < lpFirst->bbNum || lpBottom->bbNum < first->bbNum; + return bottom->bbNum < lpTop->bbNum || lpBottom->bbNum < first->bbNum; } // Returns "true" iff "*this" is disjoint from "lp2". bool lpDisjoint(const LoopDsc& lp2) const { - return lpDisjoint(lp2.lpFirst, lp2.lpBottom); + return lpDisjoint(lp2.lpTop, lp2.lpBottom); } // Returns "true" iff the loop is well-formed (see code for defn). bool lpWellFormed() const { - return lpFirst->bbNum <= lpTop->bbNum && lpTop->bbNum <= lpEntry->bbNum && - lpEntry->bbNum <= lpBottom->bbNum && + return lpTop->bbNum <= lpEntry->bbNum && lpEntry->bbNum <= lpBottom->bbNum && (lpHead->bbNum < lpTop->bbNum || lpHead->bbNum > lpBottom->bbNum); } @@ -6878,17 +6872,17 @@ class Compiler // blocks in a loop, e.g.: // for (BasicBlock* const block : loop->LoopBlocks()) ... // Currently, the loop blocks are expected to be in linear, lexical, `bbNext` order - // from `lpFirst` through `lpBottom`, inclusive. All blocks in this range are considered + // from `lpTop` through `lpBottom`, inclusive. All blocks in this range are considered // to be part of the loop. // BasicBlockRangeList LoopBlocks() const { - return BasicBlockRangeList(lpFirst, lpBottom); + return BasicBlockRangeList(lpTop, lpBottom); } }; protected: - bool fgMightHaveLoop(); // returns true if there are any backedges + bool fgMightHaveLoop(); // returns true if there are any back edges bool fgHasLoops; // True if this method has any loops, set in fgComputeReachability public: @@ -6901,7 +6895,6 @@ class Compiler #endif // DEBUG bool optRecordLoop(BasicBlock* head, - BasicBlock* first, BasicBlock* top, BasicBlock* entry, BasicBlock* bottom, @@ -6917,7 +6910,6 @@ class Compiler #ifdef DEBUG void optPrintLoopInfo(unsigned loopNum, BasicBlock* lpHead, - BasicBlock* lpFirst, BasicBlock* lpTop, BasicBlock* lpEntry, BasicBlock* lpBottom, @@ -6930,9 +6922,12 @@ class Compiler void optCheckPreds(); #endif + // Determine if there are any potential loops, and set BBF_LOOP_HEAD on potential loop heads. + void optMarkLoopHeads(); + void optSetBlockWeights(); - void optMarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk, bool excludeEndBlk); + void optScaleLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk); void optUnmarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk); @@ -8290,7 +8285,7 @@ class Compiler static fgWalkPreFn CountSharedStaticHelper; static bool IsSharedStaticHelper(GenTree* tree); - static bool IsGcSafePoint(GenTree* tree); + static bool IsGcSafePoint(GenTreeCall* call); static CORINFO_FIELD_HANDLE eeFindJitDataOffs(unsigned jitDataOffs); // returns true/false if 'field' is a Jit Data offset diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index 7fec5719abc36..6145fa5395829 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -3674,34 +3674,30 @@ inline bool Compiler::IsSharedStaticHelper(GenTree* tree) return result1; } -inline bool Compiler::IsGcSafePoint(GenTree* tree) +inline bool Compiler::IsGcSafePoint(GenTreeCall* call) { - if (tree->IsCall()) + if (!call->IsFastTailCall()) { - GenTreeCall* call = tree->AsCall(); - if (!call->IsFastTailCall()) + if (call->IsUnmanaged() && call->IsSuppressGCTransition()) { - if (call->IsUnmanaged() && call->IsSuppressGCTransition()) - { - // Both an indirect and user calls can be unmanaged - // and have a request to suppress the GC transition so - // the check is done prior to the separate handling of - // indirect and user calls. - return false; - } - else if (call->gtCallType == CT_INDIRECT) + // Both an indirect and user calls can be unmanaged + // and have a request to suppress the GC transition so + // the check is done prior to the separate handling of + // indirect and user calls. + return false; + } + else if (call->gtCallType == CT_INDIRECT) + { + return true; + } + else if (call->gtCallType == CT_USER_FUNC) + { + if ((call->gtCallMoreFlags & GTF_CALL_M_NOGCCHECK) == 0) { return true; } - else if (call->gtCallType == CT_USER_FUNC) - { - if ((call->gtCallMoreFlags & GTF_CALL_M_NOGCCHECK) == 0) - { - return true; - } - } - // otherwise we have a CT_HELPER } + // otherwise we have a CT_HELPER } return false; diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index 57683ab1fb15e..0f4631afd11ad 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -4656,8 +4656,8 @@ void Compiler::fgRemoveBlock(BasicBlock* block, bool unreachable) if (fgDomsComputed && fgReachable(succBlock, block)) { - /* Mark all the reachable blocks between 'succBlock' and 'block', excluding 'block' */ - optMarkLoopBlocks(succBlock, block, true); + // Mark all the reachable blocks between 'succBlock' and 'bPrev' + optScaleLoopBlocks(succBlock, bPrev); } } else if (succBlock->isLoopHead() && bPrev && (succBlock->bbNum <= bPrev->bbNum)) @@ -4665,8 +4665,6 @@ void Compiler::fgRemoveBlock(BasicBlock* block, bool unreachable) skipUnmarkLoop = true; } - noway_assert(succBlock); - // If this is the first Cold basic block update fgFirstColdBlock if (block == fgFirstColdBlock) { @@ -4709,12 +4707,6 @@ void Compiler::fgRemoveBlock(BasicBlock* block, bool unreachable) succBlock->bbRefs++; } - fgUnlinkBlock(block); - - /* mark the block as removed and set the change flag */ - - block->bbFlags |= BBF_REMOVED; - /* Update bbRefs and bbPreds. * All blocks jumping to 'block' now jump to 'succBlock'. * First, remove 'block' from the predecessor list of succBlock. @@ -4804,6 +4796,9 @@ void Compiler::fgRemoveBlock(BasicBlock* block, bool unreachable) break; } } + + fgUnlinkBlock(block); + block->bbFlags |= BBF_REMOVED; } if (bPrev != nullptr) @@ -5564,7 +5559,17 @@ BasicBlock* Compiler::fgRelocateEHRange(unsigned regionIndex, FG_RELOCATE_TYPE r return bLast; } -// return true if there is a possibility that the method has a loop (a backedge is present) +//------------------------------------------------------------------------ +// fgMightHaveLoop: return true if there is a possibility that the method has a loop (a back edge is present). +// This function doesn't depend on any previous loop computations, including predecessors. It looks for any +// lexical back edge to a block previously seen in a forward walk of the block list. +// +// As it walks all blocks and all successors of each block (including EH successors), it is not cheap. +// It returns as soon as any possible loop is discovered. +// +// Return Value: +// true if there might be a loop +// bool Compiler::fgMightHaveLoop() { // Don't use a BlockSet for this temporary bitset of blocks: we don't want to have to call EnsureBasicBlockEpoch() @@ -5577,7 +5582,7 @@ bool Compiler::fgMightHaveLoop() { BitVecOps::AddElemD(&blockVecTraits, blocksSeen, block->bbNum); - for (BasicBlock* succ : block->GetAllSuccs(this)) + for (BasicBlock* const succ : block->GetAllSuccs(this)) { if (BitVecOps::IsMember(&blockVecTraits, blocksSeen, succ->bbNum)) { diff --git a/src/coreclr/jit/fgdiagnostic.cpp b/src/coreclr/jit/fgdiagnostic.cpp index c5b6490b10b97..ac4850e27a521 100644 --- a/src/coreclr/jit/fgdiagnostic.cpp +++ b/src/coreclr/jit/fgdiagnostic.cpp @@ -1632,7 +1632,7 @@ bool Compiler::fgDumpFlowGraph(Phases phase, PhasePosition pos) } } - // Add regions for the loops. Note that loops are assumed to be contiguous from `lpFirst` to `lpBottom`. + // Add regions for the loops. Note that loops are assumed to be contiguous from `lpTop` to `lpBottom`. if (includeLoops) { @@ -1645,7 +1645,7 @@ bool Compiler::fgDumpFlowGraph(Phases phase, PhasePosition pos) continue; } sprintf_s(name, sizeof(name), FMT_LP, loopNum); - rgnGraph.Insert(name, RegionGraph::RegionType::Loop, loop.lpFirst, loop.lpBottom); + rgnGraph.Insert(name, RegionGraph::RegionType::Loop, loop.lpTop, loop.lpBottom); } } diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 0196a72356967..c5988836e4e68 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -364,17 +364,11 @@ void Compiler::fgComputeEnterBlocksSet() // Assumptions: // The reachability sets must be computed and valid. // -// Notes: -// Sets `fgHasLoops` if there are any loops in the function. -// Sets `BBF_LOOP_HEAD` flag on a block if that block is the target of a backward branch and the block can -// reach the source of the branch. -// bool Compiler::fgRemoveUnreachableBlocks() { assert(!fgCheapPredsValid); assert(fgReachabilitySetsValid); - bool hasLoops = false; bool hasUnreachableBlocks = false; bool changed = false; @@ -384,7 +378,7 @@ bool Compiler::fgRemoveUnreachableBlocks() /* Internal throw blocks are also reachable */ if (fgIsThrowHlpBlk(block)) { - goto SKIP_BLOCK; + continue; } else if (block == genReturnBB) { @@ -392,20 +386,20 @@ bool Compiler::fgRemoveUnreachableBlocks() // For example, in VSW 364383, // the profiler hookup needs to have the "void GT_RETURN" statement // to properly set the info.compProfilerCallback flag. - goto SKIP_BLOCK; + continue; } else { // If any of the entry blocks can reach this block, then we skip it. if (!BlockSetOps::IsEmptyIntersection(this, fgEnterBlks, block->bbReach)) { - goto SKIP_BLOCK; + continue; } #if defined(FEATURE_EH_FUNCLETS) && defined(TARGET_ARM) if (!BlockSetOps::IsEmptyIntersection(this, fgAlwaysBlks, block->bbReach)) { - goto SKIP_BLOCK; + continue; } #endif // defined(FEATURE_EH_FUNCLETS) && defined(TARGET_ARM) } @@ -451,54 +445,22 @@ bool Compiler::fgRemoveUnreachableBlocks() hasUnreachableBlocks = true; changed = true; } - continue; - - SKIP_BLOCK:; - - if (block->bbJumpKind == BBJ_RETURN) - { - continue; - } - - // Set BBF_LOOP_HEAD if we have backwards branches to this block. - - unsigned blockNum = block->bbNum; - for (BasicBlock* const predBlock : block->PredBlocks()) - { - if (blockNum <= predBlock->bbNum) - { - if (predBlock->bbJumpKind == BBJ_CALLFINALLY) - { - continue; - } - - /* If block can reach predBlock then we have a loop head */ - if (BlockSetOps::IsMember(this, predBlock->bbReach, blockNum)) - { - hasLoops = true; - - /* Set the BBF_LOOP_HEAD flag */ - block->bbFlags |= BBF_LOOP_HEAD; - break; - } - } - } } - fgHasLoops = hasLoops; - if (hasUnreachableBlocks) { // Now remove the unreachable blocks for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext) { - // If we mark the block with BBF_REMOVED then - // we need to call fgRemovedBlock() on it + // If we marked a block with BBF_REMOVED then we need to call fgRemoveBlock() on it if (block->bbFlags & BBF_REMOVED) { fgRemoveBlock(block, true); + // TODO: couldn't we have fgRemoveBlock() return the block after the (last)one removed + // so we don't need the code below? + // When we have a BBJ_CALLFINALLY, BBJ_ALWAYS pair; fgRemoveBlock will remove // both blocks, so we must advance 1 extra place in the block list // @@ -521,9 +483,6 @@ bool Compiler::fgRemoveUnreachableBlocks() // Also, compute the list of return blocks `fgReturnBlocks` and set of enter blocks `fgEnterBlks`. // Delete unreachable blocks. // -// Via the call to `fgRemoveUnreachableBlocks`, determine if the flow graph has loops and set 'fgHasLoops' -// accordingly. Set the BBF_LOOP_HEAD flag on the block target of backwards branches. -// // Assumptions: // Assumes the predecessor lists are computed and correct. // @@ -591,8 +550,6 @@ void Compiler::fgComputeReachability() // // Use reachability information to delete unreachable blocks. - // Also, determine if the flow graph has loops and set 'fgHasLoops' accordingly. - // Set the BBF_LOOP_HEAD flag on the block target of backwards branches. // changed = fgRemoveUnreachableBlocks(); @@ -2239,13 +2196,6 @@ void Compiler::fgUpdateLoopsAfterCompacting(BasicBlock* block, BasicBlock* bNext optLoopTable[loopNum].lpEntry = block; } - /* Check the loop's first block */ - - if (optLoopTable[loopNum].lpFirst == bNext) - { - optLoopTable[loopNum].lpFirst = block; - } - /* Check the loop top */ if (optLoopTable[loopNum].lpTop == bNext) diff --git a/src/coreclr/jit/fgprofile.cpp b/src/coreclr/jit/fgprofile.cpp index cc08d29bcb537..28ff5ac4530cb 100644 --- a/src/coreclr/jit/fgprofile.cpp +++ b/src/coreclr/jit/fgprofile.cpp @@ -3717,7 +3717,7 @@ bool Compiler::fgProfileWeightsEqual(weight_t weight1, weight_t weight2) } //------------------------------------------------------------------------ -// fgProfileWeightsConsistentEqual: check if two profile weights are within +// fgProfileWeightsConsistent: check if two profile weights are within // some small percentage of one another. // // Arguments: diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index b33351cd391b9..59b0ec7f93915 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -1362,7 +1362,7 @@ GenTree* Compiler::fgDoNormalizeOnStore(GenTree* tree) * execute a call or not. */ -inline void Compiler::fgLoopCallTest(BasicBlock* srcBB, BasicBlock* dstBB) +void Compiler::fgLoopCallTest(BasicBlock* srcBB, BasicBlock* dstBB) { /* Bail if this is not a backward edge */ @@ -1436,7 +1436,7 @@ void Compiler::fgLoopCallMark() * Note the fact that the given block is a loop header. */ -inline void Compiler::fgMarkLoopHead(BasicBlock* block) +void Compiler::fgMarkLoopHead(BasicBlock* block) { #ifdef DEBUG if (verbose) diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index 5bc02e47a5037..a1c71f932f5a6 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -1790,10 +1790,9 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) LoopDsc& loop = optLoopTable[loopInd]; - JITDUMP("\nCloning loop " FMT_LP ": [head: " FMT_BB ", first: " FMT_BB ", top: " FMT_BB ", entry: " FMT_BB - ", bottom: " FMT_BB ", child: " FMT_LP "].\n", - loopInd, loop.lpHead->bbNum, loop.lpFirst->bbNum, loop.lpTop->bbNum, loop.lpEntry->bbNum, - loop.lpBottom->bbNum, loop.lpChild); + JITDUMP("\nCloning loop " FMT_LP ": [head: " FMT_BB ", top: " FMT_BB ", entry: " FMT_BB ", bottom: " FMT_BB + ", child: " FMT_LP "].\n", + loopInd, loop.lpHead->bbNum, loop.lpTop->bbNum, loop.lpEntry->bbNum, loop.lpBottom->bbNum, loop.lpChild); // Determine the depth of the loop, so we can properly weight blocks added (outside the cloned loop blocks). unsigned depth = optLoopDepth(loopInd); diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 9283919ae5ef6..39bd3a0cb783c 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -1918,8 +1918,8 @@ void LinearScan::identifyCandidates() } #endif - JITDUMP("floatVarCount = %d; hasLoops = %d, singleExit = %d\n", floatVarCount, compiler->fgHasLoops, - (compiler->fgReturnBlocks == nullptr || compiler->fgReturnBlocks->next == nullptr)); + JITDUMP("floatVarCount = %d; hasLoops = %s, singleExit = %s\n", floatVarCount, dspBool(compiler->fgHasLoops), + dspBool(compiler->fgReturnBlocks == nullptr || compiler->fgReturnBlocks->next == nullptr)); // Determine whether to use the 2nd, more aggressive, threshold for fp callee saves. if (floatVarCount > 6 && compiler->fgHasLoops && diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 3862f493cbf48..c8c0ae11eda14 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -3829,7 +3829,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) if (copyBlkClass != NO_CLASS_HANDLE) { - fgMakeOutgoingStructArgCopy(call, args, argIndex, copyBlkClass); + fgMakeOutgoingStructArgCopy(call, args, copyBlkClass); } if (argx->gtOper == GT_MKREFANY) @@ -4764,16 +4764,11 @@ GenTreeFieldList* Compiler::fgMorphLclArgToFieldlist(GenTreeLclVarCommon* lcl) // Arguments: // call - call being processed // args - args for the call -/// argIndex - arg being processed // copyBlkClass - class handle for the struct // -// Return value: -// tree that computes address of the outgoing arg +// The arg is updated if necessary with the copy. // -void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, - GenTreeCall::Use* args, - unsigned argIndex, - CORINFO_CLASS_HANDLE copyBlkClass) +void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, GenTreeCall::Use* args, CORINFO_CLASS_HANDLE copyBlkClass) { GenTree* argx = args->GetNode(); noway_assert(argx->gtOper != GT_MKREFANY); @@ -4808,10 +4803,11 @@ void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, // * (may not copy) if there is exactly one use of the local in the method, // and the call is not in loop, this is a last use. // - const bool isTailCallLastUse = call->IsTailCall(); - const bool isCallLastUse = (totalAppearances == 1) && !fgMightHaveLoop(); - const bool isNoReturnLastUse = (totalAppearances == 1) && call->IsNoReturn(); - if (isTailCallLastUse || isCallLastUse || isNoReturnLastUse) + // fgMightHaveLoop() is expensive; check it last, only if necessary. + // + if (call->IsTailCall() || // + ((totalAppearances == 1) && call->IsNoReturn()) || // + ((totalAppearances == 1) && !fgMightHaveLoop())) { args->SetNode(lcl); assert(argEntry->GetNode() == lcl); @@ -4915,8 +4911,6 @@ void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, args->SetNode(arg); call->fgArgInfo->EvalToTmp(argEntry, tmp, arg); - - return; } #ifdef TARGET_ARM @@ -15383,16 +15377,16 @@ bool Compiler::fgFoldConditional(BasicBlock* block) optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED; #if FEATURE_LOOP_ALIGN - optLoopTable[loopNum].lpFirst->bbFlags &= ~BBF_LOOP_ALIGN; + optLoopTable[loopNum].lpTop->bbFlags &= ~BBF_LOOP_ALIGN; JITDUMP("Removing LOOP_ALIGN flag from bogus loop in " FMT_BB "\n", - optLoopTable[loopNum].lpFirst->bbNum); + optLoopTable[loopNum].lpTop->bbNum); #endif #ifdef DEBUG if (verbose) { printf("Removing loop " FMT_LP " (from " FMT_BB " to " FMT_BB ")\n\n", loopNum, - optLoopTable[loopNum].lpFirst->bbNum, optLoopTable[loopNum].lpBottom->bbNum); + optLoopTable[loopNum].lpTop->bbNum, optLoopTable[loopNum].lpBottom->bbNum); } #endif } diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index b760e5d201e85..e3f0218bd7ba8 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -78,13 +78,14 @@ void Compiler::optSetBlockWeights() if (!usingProfileWeights && firstBBDominatesAllReturns) { + // If the weight is already zero (and thus rarely run), there's no point scaling it. if (block->bbWeight != BB_ZERO_WEIGHT) { - // Calculate our bbWeight: - // - // o BB_UNITY_WEIGHT if we dominate all BBJ_RETURN blocks - // o otherwise BB_UNITY_WEIGHT / 2 - // + // If the block dominates all return blocks, leave the weight alone. Otherwise, + // scale the weight by 0.5 as a heuristic that some other path gets some of the dynamic flow. + // Note that `optScaleLoopBlocks` has a similar heuristic for loop blocks that don't dominate + // their loop back edge. + bool blockDominatesAllReturns = true; // Assume that we will dominate for (BasicBlockList* retBlocks = fgReturnBlocks; retBlocks != nullptr; retBlocks = retBlocks->next) @@ -99,6 +100,10 @@ void Compiler::optSetBlockWeights() if (block == fgFirstBB) { firstBBDominatesAllReturns = blockDominatesAllReturns; + + // Don't scale the weight of the first block, since it is guaranteed to execute. + // If the first block does not dominate all the returns, we won't scale any of the function's + // block weights. } else { @@ -108,6 +113,12 @@ void Compiler::optSetBlockWeights() if (!blockDominatesAllReturns) { INDEBUG(changed = true); + + // TODO-Cleanup: we should use: + // block->scaleBBWeight(0.5); + // since we are inheriting "from ourselves", but that leads to asm diffs due to minutely + // different floating-point value in the calculation, and some code that compares weights + // for equality. block->inheritWeightPercentage(block, 50); } } @@ -128,29 +139,25 @@ void Compiler::optSetBlockWeights() #endif } -/***************************************************************************** - * - * Marks the blocks between 'begBlk' and 'endBlk' as part of a loop. - */ - -void Compiler::optMarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk, bool excludeEndBlk) +//------------------------------------------------------------------------ +// optScaleLoopBlocks: Scale the weight of loop blocks from 'begBlk' to 'endBlk'. +// +// Arguments: +// begBlk - first block of range. Must be marked as a loop head (BBF_LOOP_HEAD). +// endBlk - last block of range (inclusive). Must be reachable from `begBlk`. +// +// Operation: +// Calculate the 'loop weight'. This is the amount to scale the weight of each block in the loop. +// Our heuristic is that loops are weighted eight times more than straight-line code +// (scale factor is BB_LOOP_WEIGHT_SCALE). If the loops are all properly formed this gives us these weights: +// +// 1 -- non-loop basic block +// 8 -- single loop nesting +// 64 -- double loop nesting +// 512 -- triple loop nesting +// +void Compiler::optScaleLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk) { - /* Calculate the 'loopWeight', - this is the amount to increase each block in the loop - Our heuristic is that loops are weighted eight times more - than straight line code. - Thus we increase each block by 7 times the weight of - the loop header block, - if the loops are all properly formed gives us: - (assuming that BB_LOOP_WEIGHT_SCALE is 8) - - 1 -- non loop basic block - 8 -- single loop nesting - 64 -- double loop nesting - 512 -- triple loop nesting - - */ - noway_assert(begBlk->bbNum <= endBlk->bbNum); noway_assert(begBlk->isLoopHead()); noway_assert(fgReachable(begBlk, endBlk)); @@ -159,17 +166,16 @@ void Compiler::optMarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk, bool ex #ifdef DEBUG if (verbose) { - printf("\nMarking a loop from " FMT_BB " to " FMT_BB, begBlk->bbNum, - excludeEndBlk ? endBlk->bbPrev->bbNum : endBlk->bbNum); + printf("\nMarking a loop from " FMT_BB " to " FMT_BB, begBlk->bbNum, endBlk->bbNum); } #endif - /* Build list of backedges for block begBlk */ + // Build list of back edges for block begBlk. flowList* backedgeList = nullptr; for (BasicBlock* const predBlock : begBlk->PredBlocks()) { - /* Is this a backedge? */ + // Is this a back edge? if (predBlock->bbNum >= begBlk->bbNum) { backedgeList = new (this, CMK_FlowList) flowList(predBlock, backedgeList); @@ -181,24 +187,41 @@ void Compiler::optMarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk, bool ex } } - /* At least one backedge must have been found (the one from endBlk) */ + // At least one backedge must have been found (the one from endBlk). noway_assert(backedgeList); - BasicBlock* curBlk = begBlk; + auto reportBlockWeight = [&](BasicBlock* blk, const char* message) { +#ifdef DEBUG + if (verbose) + { + printf("\n " FMT_BB "(wt=" FMT_WT ")%s", blk->bbNum, blk->getBBWeight(this), message); + } +#endif // DEBUG + }; - while (true) + for (BasicBlock* const curBlk : BasicBlockRangeList(begBlk, endBlk)) { - noway_assert(curBlk); + // Don't change the block weight if it came from profile data. + if (curBlk->hasProfileWeight()) + { + reportBlockWeight(curBlk, "; unchanged: has profile weight"); + continue; + } - // For curBlk to be part of a loop that starts at begBlk - // curBlk must be reachable from begBlk and (since this is a loop) - // likewise begBlk must be reachable from curBlk. - // + // Don't change the block weight if it's known to be rarely run. + if (curBlk->isRunRarely()) + { + reportBlockWeight(curBlk, "; unchanged: run rarely"); + continue; + } + + // For curBlk to be part of a loop that starts at begBlk, curBlk must be reachable from begBlk and + // (since this is a loop) begBlk must likewise be reachable from curBlk. if (fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk)) { - /* If this block reaches any of the backedge blocks we set reachable */ - /* If this block dominates any of the backedge blocks we set dominates */ + // If `curBlk` reaches any of the back edge blocks we set `reachable`. + // If `curBlk` dominates any of the back edge blocks we set `dominates`. bool reachable = false; bool dominates = false; @@ -206,88 +229,75 @@ void Compiler::optMarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk, bool ex { BasicBlock* backedge = tmp->getBlock(); - if (!curBlk->isRunRarely()) - { - reachable |= fgReachable(curBlk, backedge); - dominates |= fgDominate(curBlk, backedge); + reachable |= fgReachable(curBlk, backedge); + dominates |= fgDominate(curBlk, backedge); - if (dominates && reachable) - { - break; - } + if (dominates && reachable) + { + // No need to keep looking; we've already found all the info we need. + break; } } if (reachable) { + // If the block has BB_ZERO_WEIGHT, then it should be marked as rarely run, and skipped, above. noway_assert(curBlk->bbWeight > BB_ZERO_WEIGHT); - if (!curBlk->hasProfileWeight()) - { - weight_t scale = BB_LOOP_WEIGHT_SCALE; - - if (!dominates) - { - scale = scale / 2; - } + weight_t scale = BB_LOOP_WEIGHT_SCALE; - curBlk->scaleBBWeight(scale); + if (!dominates) + { + // If `curBlk` reaches but doesn't dominate any back edge to `endBlk` then there must be at least + // some other path to `endBlk`, so don't give `curBlk` all the execution weight. + scale = scale / 2; } - JITDUMP("\n " FMT_BB "(wt=" FMT_WT ")", curBlk->bbNum, curBlk->getBBWeight(this)); - } - } - - /* Stop if we've reached the last block in the loop */ + curBlk->scaleBBWeight(scale); - if (curBlk == endBlk) - { - break; + reportBlockWeight(curBlk, ""); + } + else + { + reportBlockWeight(curBlk, "; unchanged: back edge unreachable"); + } } - - curBlk = curBlk->bbNext; - - /* If we are excluding the endBlk then stop if we've reached endBlk */ - - if (excludeEndBlk && (curBlk == endBlk)) + else { - break; + reportBlockWeight(curBlk, "; unchanged: block not in loop"); } } } -/***************************************************************************** - * - * Unmark the blocks between 'begBlk' and 'endBlk' as part of a loop. - */ - +//------------------------------------------------------------------------ +// optUnmarkLoopBlocks: Unmark the blocks between 'begBlk' and 'endBlk' as part of a loop. +// +// Arguments: +// begBlk - first block of range. Must be marked as a loop head (BBF_LOOP_HEAD). +// endBlk - last block of range (inclusive). Must be reachable from `begBlk`. +// +// Operation: +// A set of blocks that were previously marked as a loop are now to be unmarked, since we have decided that +// for some reason this loop no longer exists. Basically we are just resetting the blocks bbWeight to their +// previous values. +// void Compiler::optUnmarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk) { - /* A set of blocks that were previously marked as a loop are now - to be unmarked, since we have decided that for some reason this - loop no longer exists. - Basically we are just reseting the blocks bbWeight to their - previous values. - */ - noway_assert(begBlk->bbNum <= endBlk->bbNum); noway_assert(begBlk->isLoopHead()); - noway_assert(!opts.MinOpts()); unsigned backEdgeCount = 0; for (BasicBlock* const predBlock : begBlk->PredBlocks()) { - /* is this a backward edge? (from predBlock to begBlk) */ - + // Is this a backward edge? (from predBlock to begBlk) if (begBlk->bbNum > predBlock->bbNum) { continue; } - /* We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops */ - + // We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops. if ((predBlock->bbJumpKind != BBJ_COND) && (predBlock->bbJumpKind != BBJ_ALWAYS)) { continue; @@ -296,7 +306,7 @@ void Compiler::optUnmarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk) backEdgeCount++; } - /* Only unmark the loop blocks if we have exactly one loop back edge */ + // Only unmark the loop blocks if we have exactly one loop back edge. if (backEdgeCount != 1) { #ifdef DEBUG @@ -314,58 +324,56 @@ void Compiler::optUnmarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk) #endif return; } - noway_assert(backEdgeCount == 1); noway_assert(fgReachable(begBlk, endBlk)); #ifdef DEBUG if (verbose) { - printf("\nUnmarking loop at " FMT_BB, begBlk->bbNum); + printf("\nUnmarking a loop from " FMT_BB " to " FMT_BB, begBlk->bbNum, endBlk->bbNum); } #endif - BasicBlock* curBlk = begBlk; - while (true) + for (BasicBlock* const curBlk : BasicBlockRangeList(begBlk, endBlk)) { - noway_assert(curBlk); - - // For curBlk to be part of a loop that starts at begBlk - // curBlk must be reachable from begBlk and (since this is a loop) - // likewise begBlk must be reachable from curBlk. - // - if (!curBlk->isRunRarely() && fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk)) + // Stop if we go past the last block in the loop, as it may have been deleted. + if (curBlk->bbNum > endBlk->bbNum) { - // Don't unmark blocks that are set to BB_MAX_WEIGHT - // Don't unmark blocks when we are using profile weights - // - if (!curBlk->isMaxBBWeight() && !curBlk->hasProfileWeight()) - { - weight_t scale = 1.0 / BB_LOOP_WEIGHT_SCALE; - - if (!fgDominate(curBlk, endBlk)) - { - scale *= 2; - } - - curBlk->scaleBBWeight(scale); - } - - JITDUMP("\n " FMT_BB "(wt=" FMT_WT ")", curBlk->bbNum, curBlk->getBBWeight(this)); + break; } - /* Stop if we've reached the last block in the loop */ + // Don't change the block weight if it's known to be rarely run. + if (curBlk->isRunRarely()) + { + continue; + } - if (curBlk == endBlk) + // Don't change the block weight if it came from profile data. + if (curBlk->hasProfileWeight()) { - break; + continue; } - curBlk = curBlk->bbNext; + // Don't unmark blocks that are maximum weight. + if (curBlk->isMaxBBWeight()) + { + continue; + } - /* Stop if we go past the last block in the loop, as it may have been deleted */ - if (curBlk->bbNum > endBlk->bbNum) + // For curBlk to be part of a loop that starts at begBlk, curBlk must be reachable from begBlk and + // (since this is a loop) begBlk must likewise be reachable from curBlk. + // + if (fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk)) { - break; + weight_t scale = 1.0 / BB_LOOP_WEIGHT_SCALE; + + if (!fgDominate(curBlk, endBlk)) + { + scale *= 2; + } + + curBlk->scaleBBWeight(scale); + + JITDUMP("\n " FMT_BB "(wt=" FMT_WT ")", curBlk->bbNum, curBlk->getBBWeight(this)); } } @@ -550,13 +558,18 @@ void Compiler::optUpdateLoopsBeforeRemoveBlock(BasicBlock* block, bool skipUnmar { printf("\nUpdateLoopsBeforeRemoveBlock After: "); optPrintLoopInfo(loopNum); + printf("\n"); } #endif } - if ((skipUnmarkLoop == false) && ((block->bbJumpKind == BBJ_ALWAYS) || (block->bbJumpKind == BBJ_COND)) && - (block->bbJumpDest->isLoopHead()) && (block->bbJumpDest->bbNum <= block->bbNum) && fgDomsComputed && - (fgCurBBEpochSize == fgDomBBcount + 1) && fgReachable(block->bbJumpDest, block)) + if ((skipUnmarkLoop == false) && // + ((block->bbJumpKind == BBJ_ALWAYS) || (block->bbJumpKind == BBJ_COND)) && // + block->bbJumpDest->isLoopHead() && // + (block->bbJumpDest->bbNum <= block->bbNum) && // + fgDomsComputed && // + (fgCurBBEpochSize == fgDomBBcount + 1) && // + fgReachable(block->bbJumpDest, block)) { optUnmarkLoopBlocks(block->bbJumpDest, block); } @@ -571,7 +584,6 @@ void Compiler::optUpdateLoopsBeforeRemoveBlock(BasicBlock* block, bool skipUnmar void Compiler::optPrintLoopInfo(unsigned loopInd, BasicBlock* lpHead, - BasicBlock* lpFirst, BasicBlock* lpTop, BasicBlock* lpEntry, BasicBlock* lpBottom, @@ -579,16 +591,8 @@ void Compiler::optPrintLoopInfo(unsigned loopInd, BasicBlock* lpExit, unsigned parentLoop) const { - noway_assert(lpHead); - - printf(FMT_LP ", from " FMT_BB, loopInd, lpFirst->bbNum); - if (lpTop != lpFirst) - { - printf(" (loop top is " FMT_BB ")", lpTop->bbNum); - } - - printf(" to " FMT_BB " (Head=" FMT_BB ", Entry=" FMT_BB ", ExitCnt=%d", lpBottom->bbNum, lpHead->bbNum, - lpEntry->bbNum, lpExitCnt); + printf(FMT_LP ", from " FMT_BB " to " FMT_BB " (Head=" FMT_BB ", Entry=" FMT_BB ", ExitCnt=%d", loopInd, + lpTop->bbNum, lpBottom->bbNum, lpHead->bbNum, lpEntry->bbNum, lpExitCnt); if (lpExitCnt == 1) { @@ -609,15 +613,15 @@ void Compiler::optPrintLoopInfo(unsigned loopInd, void Compiler::optPrintLoopInfo(unsigned lnum) const { - noway_assert(lnum < optLoopCount); + assert(lnum < optLoopCount); - const LoopDsc* ldsc = &optLoopTable[lnum]; // lnum is the INDEX to the loop table. + const LoopDsc* ldsc = &optLoopTable[lnum]; - optPrintLoopInfo(lnum, ldsc->lpHead, ldsc->lpFirst, ldsc->lpTop, ldsc->lpEntry, ldsc->lpBottom, ldsc->lpExitCnt, - ldsc->lpExit, ldsc->lpParent); + optPrintLoopInfo(lnum, ldsc->lpHead, ldsc->lpTop, ldsc->lpEntry, ldsc->lpBottom, ldsc->lpExitCnt, ldsc->lpExit, + ldsc->lpParent); } -#endif +#endif // DEBUG //------------------------------------------------------------------------ // optPopulateInitInfo: Populate loop init info in the loop table. @@ -1035,13 +1039,8 @@ bool Compiler::optExtractInitTestIncr( * out of entries in loop table. */ -bool Compiler::optRecordLoop(BasicBlock* head, - BasicBlock* first, - BasicBlock* top, - BasicBlock* entry, - BasicBlock* bottom, - BasicBlock* exit, - unsigned char exitCnt) +bool Compiler::optRecordLoop( + BasicBlock* head, BasicBlock* top, BasicBlock* entry, BasicBlock* bottom, BasicBlock* exit, unsigned char exitCnt) { // Record this loop in the table, if there's room. @@ -1055,7 +1054,6 @@ bool Compiler::optRecordLoop(BasicBlock* head, } // Assumed preconditions on the loop we're adding. - assert(first->bbNum <= top->bbNum); assert(top->bbNum <= entry->bbNum); assert(entry->bbNum <= bottom->bbNum); assert(head->bbNum < top->bbNum || head->bbNum > bottom->bbNum); @@ -1073,7 +1071,7 @@ bool Compiler::optRecordLoop(BasicBlock* head, for (unsigned char prevPlus1 = optLoopCount; prevPlus1 > 0; prevPlus1--) { unsigned char prev = prevPlus1 - 1; - if (optLoopTable[prev].lpContainedBy(first, bottom)) + if (optLoopTable[prev].lpContainedBy(top, bottom)) { loopInd = prev; } @@ -1091,17 +1089,16 @@ bool Compiler::optRecordLoop(BasicBlock* head, // The loop is well-formed. assert(optLoopTable[i].lpWellFormed()); // Check for disjoint. - if (optLoopTable[i].lpDisjoint(first, bottom)) + if (optLoopTable[i].lpDisjoint(top, bottom)) { continue; } // Otherwise, assert complete containment (of optLoopTable[i] in new loop). - assert(optLoopTable[i].lpContainedBy(first, bottom)); + assert(optLoopTable[i].lpContainedBy(top, bottom)); } #endif // DEBUG optLoopTable[loopInd].lpHead = head; - optLoopTable[loopInd].lpFirst = first; optLoopTable[loopInd].lpTop = top; optLoopTable[loopInd].lpBottom = bottom; optLoopTable[loopInd].lpEntry = entry; @@ -1257,7 +1254,7 @@ void Compiler::optPrintLoopRecording(unsigned loopInd) const printf("Recorded loop %s", (loopInd != optLoopCount ? "(extended) " : "")); optPrintLoopInfo(optLoopCount, // Not necessarily the loop index, but the number of loops that have been added. - loop.lpHead, loop.lpFirst, loop.lpTop, loop.lpEntry, loop.lpBottom, loop.lpExitCnt, loop.lpExit); + loop.lpHead, loop.lpTop, loop.lpEntry, loop.lpBottom, loop.lpExitCnt, loop.lpExit); // If an iterator loop print the iterator and the initialization. if (loop.lpFlags & LPFLG_ITER) @@ -1341,14 +1338,13 @@ namespace { //------------------------------------------------------------------------ // LoopSearch: Class that handles scanning a range of blocks to detect a loop, -// moving blocks to make the loop body contiguous, and recording -// the loop. +// moving blocks to make the loop body contiguous, and recording the loop. // // We will use the following terminology: // HEAD - the basic block that flows into the loop ENTRY block (Currently MUST be lexically before entry). // Not part of the looping of the loop. -// FIRST - the lexically first basic block (in bbNext order) within this loop. -// TOP - the target of the backward edge from BOTTOM. In most cases FIRST and TOP are the same. +// TOP - the target of the backward edge from BOTTOM, and the lexically first basic block (in bbNext order) +// within this loop. // BOTTOM - the lexically last block in the loop (i.e. the block from which we jump to the top) // EXIT - the predecessor of loop's unique exit edge, if it has a unique exit edge; else nullptr // ENTRY - the entry in the loop (not necessarly the TOP), but there must be only one entry @@ -1359,14 +1355,14 @@ namespace // between TOP and BOTTOM as part of the loop even if they aren't part of the SCC. // Regarding nesting: Since a given block can only have one back-edge (we only detect loops with back-edges // from BBJ_COND or BBJ_ALWAYS blocks), no two loops will share the same BOTTOM. Two loops may share the -// same FIRST/TOP/ENTRY as reported by LoopSearch, and optCanonicalizeLoopNest will subsequently re-write -// the CFG so that no two loops share the same FIRST/TOP/ENTRY anymore. +// same TOP/ENTRY as reported by LoopSearch, and optCanonicalizeLoopNest will subsequently re-write +// the CFG so that no two loops share the same TOP/ENTRY anymore. // // | // v // head // | -// | top/first <--+ +// | top <--+ // | | | // | ... | // | | | @@ -1450,7 +1446,10 @@ class LoopSearch { return BlockSetOps::IsMember(comp, newBlocksInLoop, blockNum - oldBlockMaxNum); } - return BlockSetOps::IsMember(comp, oldBlocksInLoop, blockNum); + else + { + return BlockSetOps::IsMember(comp, oldBlocksInLoop, blockNum); + } } void Insert(unsigned int blockNum) @@ -1493,7 +1492,6 @@ class LoopSearch // See LoopSearch class comment header for a diagram relating these fields: BasicBlock* head; // Predecessor of unique entry edge - BasicBlock* first; // Lexically first in-loop block BasicBlock* top; // Successor of back-edge from BOTTOM BasicBlock* bottom; // Predecessor of back-edge to TOP, also lexically last in-loop block BasicBlock* entry; // Successor of unique entry edge @@ -1524,12 +1522,12 @@ class LoopSearch // bool RecordLoop() { - /* At this point we have a compact loop - record it in the loop table - * If we found only one exit, record it in the table too - * (otherwise an exit = nullptr in the loop table means multiple exits) */ + // At this point we have a compact loop - record it in the loop table. + // If we found only one exit, record it in the table too + // (otherwise an exit = nullptr in the loop table means multiple exits). BasicBlock* onlyExit = (exitCount == 1 ? lastExit : nullptr); - if (comp->optRecordLoop(head, first, top, entry, bottom, onlyExit, exitCount)) + if (comp->optRecordLoop(head, top, entry, bottom, onlyExit, exitCount)) { // Record the BOTTOM block for future reference before returning. assert(bottom->bbNum <= oldBlockMaxNum); @@ -1573,12 +1571,11 @@ class LoopSearch // bool FindLoop(BasicBlock* head, BasicBlock* top, BasicBlock* bottom) { - /* Is this a loop candidate? - We look for "back edges", i.e. an edge from BOTTOM - * to TOP (note that this is an abuse of notation since this is not necessarily a back edge - * as the definition says, but merely an indication that we have a loop there). - * Thus, we have to be very careful and after entry discovery check that it is indeed - * the only place we enter the loop (especially for non-reducible flow graphs). - */ + // Is this a loop candidate? - We look for "back edges", i.e. an edge from BOTTOM + // to TOP (note that this is an abuse of notation since this is not necessarily a back edge + // as the definition says, but merely an indication that we have a loop there). + // Thus, we have to be very careful and after entry discovery check that it is indeed + // the only place we enter the loop (especially for non-reducible flow graphs). if (top->bbNum > bottom->bbNum) // is this a backward edge? (from BOTTOM to TOP) { @@ -1597,21 +1594,20 @@ class LoopSearch (bottom->bbJumpKind == BBJ_EHCATCHRET) || (bottom->bbJumpKind == BBJ_CALLFINALLY) || (bottom->bbJumpKind == BBJ_SWITCH)) { - /* BBJ_EHFINALLYRET, BBJ_EHFILTERRET, BBJ_EHCATCHRET, and BBJ_CALLFINALLY can never form a loop. - * BBJ_SWITCH that has a backward jump appears only for labeled break. */ + // BBJ_EHFINALLYRET, BBJ_EHFILTERRET, BBJ_EHCATCHRET, and BBJ_CALLFINALLY can never form a loop. + // BBJ_SWITCH that has a backward jump appears only for labeled break. return false; } - /* The presence of a "back edge" is an indication that a loop might be present here - * - * LOOP: - * 1. A collection of STRONGLY CONNECTED nodes i.e. there is a path from any - * node in the loop to any other node in the loop (wholly within the loop) - * 2. The loop has a unique ENTRY, i.e. there is only one way to reach a node - * in the loop from outside the loop, and that is through the ENTRY - */ + // The presence of a "back edge" is an indication that a loop might be present here. + // + // Definition: A loop is: + // 1. A collection of STRONGLY CONNECTED nodes i.e. there is a path from any + // node in the loop to any other node in the loop (wholly within the loop) + // 2. The loop has a unique ENTRY, i.e. there is only one way to reach a node + // in the loop from outside the loop, and that is through the ENTRY - /* Let's find the loop ENTRY */ + // Let's find the loop ENTRY BasicBlock* entry = FindEntry(head, top, bottom); if (entry == nullptr) @@ -1628,10 +1624,6 @@ class LoopSearch this->lastExit = nullptr; this->exitCount = 0; - // Now we find the "first" block -- the earliest block reachable within the loop. - // With our current algorithm, this is always the same as "top". - this->first = top; - if (!HasSingleEntryCycle()) { // There isn't actually a loop between TOP and BOTTOM @@ -1662,27 +1654,25 @@ class LoopSearch // // Here, BB10 is more nested than BB02. - if (bottom->hasTryIndex() && !comp->bbInTryRegions(bottom->getTryIndex(), first)) + if (bottom->hasTryIndex() && !comp->bbInTryRegions(bottom->getTryIndex(), top)) { - JITDUMP("Loop 'first' " FMT_BB " is in an outer EH region compared to loop 'bottom' " FMT_BB ". Rejecting " + JITDUMP("Loop 'top' " FMT_BB " is in an outer EH region compared to loop 'bottom' " FMT_BB ". Rejecting " "loop.\n", - first->bbNum, bottom->bbNum); + top->bbNum, bottom->bbNum); return false; } #if defined(FEATURE_EH_FUNCLETS) && defined(TARGET_ARM) // Disqualify loops where the first block of the loop is a finally target. - // The main problem is when multiple loops share a 'first' block that is a finally + // The main problem is when multiple loops share a 'top' block that is a finally // target and we canonicalize the loops by adding a new loop head. In that case, we // need to update the blocks so the finally target bit is moved to the newly created - // block, and removed from the old 'first' block. This is 'hard', so at this point - // in the RyuJIT codebase (when we don't expect to keep the "old" ARM32 code generator - // long-term), it's easier to disallow the loop than to update the flow graph to - // support this case. + // block, and removed from the old 'top' block. This is 'hard', so it's easier to disallow + // the loop than to update the flow graph to support this case. - if ((first->bbFlags & BBF_FINALLY_TARGET) != 0) + if ((top->bbFlags & BBF_FINALLY_TARGET) != 0) { - JITDUMP("Loop 'first' " FMT_BB " is a finally target. Rejecting loop.\n", first->bbNum); + JITDUMP("Loop 'top' " FMT_BB " is a finally target. Rejecting loop.\n", top->bbNum); return false; } #endif // defined(FEATURE_EH_FUNCLETS) && defined(TARGET_ARM) @@ -1729,18 +1719,16 @@ class LoopSearch { if (head->bbJumpDest->bbNum <= bottom->bbNum && head->bbJumpDest->bbNum >= top->bbNum) { - /* OK - we enter somewhere within the loop */ - - /* some useful asserts - * Cannot enter at the top - should have being caught by redundant jumps */ + // OK - we enter somewhere within the loop. + // Cannot enter at the top - should have being caught by redundant jumps assert((head->bbJumpDest != top) || (head->bbFlags & BBF_KEEP_BBJ_ALWAYS)); return head->bbJumpDest; } else { - /* special case - don't consider now */ + // special case - don't consider now // assert (!"Loop entered in weird way!"); return nullptr; } @@ -1748,12 +1736,12 @@ class LoopSearch // Can we fall through into the loop? else if (head->bbJumpKind == BBJ_NONE || head->bbJumpKind == BBJ_COND) { - /* The ENTRY is at the TOP (a do-while loop) */ + // The ENTRY is at the TOP (a do-while loop) return top; } else { - return nullptr; // head does not flow into the loop bail for now + return nullptr; // HEAD does not flow into the loop; bail for now } } @@ -1767,8 +1755,7 @@ class LoopSearch // false - Did not find a single-entry cycle. // // Notes: - // Will mark (in `loopBlocks`) all blocks found to participate in the - // cycle. + // Will mark (in `loopBlocks`) all blocks found to participate in the cycle. // bool HasSingleEntryCycle() { @@ -1785,9 +1772,7 @@ class LoopSearch BasicBlock* block = worklist.back(); worklist.pop_back(); - /* Make sure ENTRY dominates all blocks in the loop - * This is necessary to ensure condition 2. above - */ + // Make sure ENTRY dominates all blocks in the loop. if (block->bbNum > oldBlockMaxNum) { // This is a new block we added to connect fall-through, so the @@ -2212,7 +2197,7 @@ class LoopSearch if ((block->bbJumpKind == BBJ_COND) && (block->bbJumpDest == newNext)) { - /* Reverse the jump condition */ + // Reverse the jump condition GenTree* test = block->lastNode(); noway_assert(test->OperIsConditionalJump()); @@ -2280,7 +2265,7 @@ class LoopSearch if (!loopBlocks.IsMember(exitPoint->bbNum)) { - /* exit from a block other than BOTTOM */ + // Exit from a block other than BOTTOM lastExit = block; exitCount++; } @@ -2291,13 +2276,13 @@ class LoopSearch case BBJ_EHFINALLYRET: case BBJ_EHFILTERRET: - /* The "try" associated with this "finally" must be in the - * same loop, so the finally block will return control inside the loop */ + // The "try" associated with this "finally" must be in the same loop, so the + // finally block will return control inside the loop. break; case BBJ_THROW: case BBJ_RETURN: - /* those are exits from the loop */ + // Those are exits from the loop lastExit = block; exitCount++; break; @@ -2326,14 +2311,17 @@ class LoopSearch } } }; -} - -/***************************************************************************** - * Find the natural loops, using dominators. Note that the test for - * a loop is slightly different from the standard one, because we have - * not done a depth first reordering of the basic blocks. - */ +} // end (anonymous) namespace +//------------------------------------------------------------------------ +// optFindNaturalLoops: Find the natural loops, using dominators. Note that the test for +// a loop is slightly different from the standard one, because we have not done a depth +// first reordering of the basic blocks. +// +// See LoopSearch class comment header for a description of the loops found. +// +// We will find and record a maximum of BasicBlock::MAX_LOOP_NUM loops (currently 64). +// void Compiler::optFindNaturalLoops() { #ifdef DEBUG @@ -2358,9 +2346,7 @@ void Compiler::optFindNaturalLoops() { BasicBlock* top = head->bbNext; - // Blocks that are rarely run have a zero bbWeight and should - // never be optimized here - + // Blocks that are rarely run have a zero bbWeight and should never be optimized here. if (top->bbWeight == BB_ZERO_WEIGHT) { continue; @@ -2378,16 +2364,16 @@ void Compiler::optFindNaturalLoops() #if COUNT_LOOPS if (!hasMethodLoops) { - /* mark the method as containing natural loops */ + // Mark the method as containing natural loops totalLoopMethods++; hasMethodLoops = true; } - /* increment total number of loops found */ + // Increment total number of loops found totalLoopCount++; loopsThisMethod++; - /* keep track of the number of exits */ + // Keep track of the number of exits loopExitCountTable.record(static_cast(search.GetExitCount())); // Note that we continue to look for loops even if @@ -2437,13 +2423,13 @@ void Compiler::optFindNaturalLoops() { // Need to renumber blocks now since loop canonicalization // depends on it; can defer the rest of fgUpdateChangedFlowGraph() - // until after canonicalizing loops. Dominator information is + // until after canonicalizing loops. Dominator information is // recorded in terms of block numbers, so flag it invalid. fgDomsComputed = false; fgRenumberBlocks(); } - // Now the loop indices are stable. We can figure out parent/child relationships + // Now the loop indices are stable. We can figure out parent/child relationships // (using table indices to name loops), and label blocks. for (unsigned char loopInd = 1; loopInd < optLoopCount; loopInd++) { @@ -2460,9 +2446,10 @@ void Compiler::optFindNaturalLoops() } } - // Now label the blocks with the innermost loop to which they belong. Since parents + // Now label the blocks with the innermost loop to which they belong. Since parents // precede children in the table, doing the labeling for each loop in order will achieve - // this -- the innermost loop labeling will be done last. + // this -- the innermost loop labeling will be done last. (Inner loop blocks will be + // labeled multiple times before being correct at the end.) for (unsigned char loopInd = 0; loopInd < optLoopCount; loopInd++) { for (BasicBlock* const blk : optLoopTable[loopInd].LoopBlocks()) @@ -2494,7 +2481,7 @@ void Compiler::optFindNaturalLoops() } #ifdef DEBUG - if (verbose && optLoopCount > 0) + if (verbose && (optLoopCount > 0)) { printf("\nFinal natural loop table:\n"); for (unsigned loopInd = 0; loopInd < optLoopCount; loopInd++) @@ -2506,34 +2493,37 @@ void Compiler::optFindNaturalLoops() #endif // DEBUG } -//----------------------------------------------------------------------------- +//------------------------------------------------------------------------ +// optIdentifyLoopsForAlignment: Determine which loops should be considered for alignment. // -// All the inner loops that whose block weight meets a threshold are marked -// as needing alignment. +// All innermost loops whose block weight meets a threshold are candidates for alignment. +// The `first` block of the loop is marked with the BBF_LOOP_ALIGN flag to indicate this +// (the loop table itself is not changed). +// +// Depends on the loop table, and on block weights being set. // - void Compiler::optIdentifyLoopsForAlignment() { #if FEATURE_LOOP_ALIGN if (codeGen->ShouldAlignLoops()) { - for (unsigned char loopInd = 0; loopInd < optLoopCount; loopInd++) + for (BasicBlock::loopNumber loopInd = 0; loopInd < optLoopCount; loopInd++) { - BasicBlock* first = optLoopTable[loopInd].lpFirst; - // An innerloop candidate that might need alignment if (optLoopTable[loopInd].lpChild == BasicBlock::NOT_IN_LOOP) { - if (first->getBBWeight(this) >= (opts.compJitAlignLoopMinBlockWeight * BB_UNITY_WEIGHT)) + BasicBlock* top = optLoopTable[loopInd].lpTop; + weight_t topWeight = top->getBBWeight(this); + if (topWeight >= (opts.compJitAlignLoopMinBlockWeight * BB_UNITY_WEIGHT)) { - first->bbFlags |= BBF_LOOP_ALIGN; + top->bbFlags |= BBF_LOOP_ALIGN; JITDUMP(FMT_LP " that starts at " FMT_BB " needs alignment, weight=" FMT_WT ".\n", loopInd, - first->bbNum, first->getBBWeight(this)); + top->bbNum, topWeight); } else { JITDUMP("Skip alignment for " FMT_LP " that starts at " FMT_BB " weight=" FMT_WT ".\n", loopInd, - first->bbNum, first->getBBWeight(this)); + top->bbNum, topWeight); } } } @@ -2668,7 +2658,7 @@ bool Compiler::optCanonicalizeLoopNest(unsigned char loopInd) { bool modified = false; - // Is the top of the current loop not in any nested loop? + // Is the top of the current loop in any nested loop? if (optLoopTable[loopInd].lpTop->bbNatLoopNum != loopInd) { if (optCanonicalizeLoop(loopInd)) @@ -2677,8 +2667,9 @@ bool Compiler::optCanonicalizeLoopNest(unsigned char loopInd) } } - for (unsigned char child = optLoopTable[loopInd].lpChild; child != BasicBlock::NOT_IN_LOOP; - child = optLoopTable[child].lpSibling) + for (unsigned char child = optLoopTable[loopInd].lpChild; // + child != BasicBlock::NOT_IN_LOOP; // + child = optLoopTable[child].lpSibling) { if (optCanonicalizeLoopNest(child)) { @@ -2706,7 +2697,7 @@ bool Compiler::optCanonicalizeLoop(unsigned char loopInd) // Otherwise, the top of this loop is also part of a nested loop. // // Insert a new unique top for this loop. We must be careful to put this new - // block in the correct EH region. Note that f->bbPrev might be in a different + // block in the correct EH region. Note that t->bbPrev might be in a different // EH region. For example: // // try { @@ -2781,16 +2772,15 @@ bool Compiler::optCanonicalizeLoop(unsigned char loopInd) // simplify things, we disqualify this type of loop, so we should never see this here. BasicBlock* h = optLoopTable[loopInd].lpHead; - BasicBlock* f = optLoopTable[loopInd].lpFirst; BasicBlock* b = optLoopTable[loopInd].lpBottom; // The loop must be entirely contained within a single handler region. - assert(BasicBlock::sameHndRegion(f, b)); + assert(BasicBlock::sameHndRegion(t, b)); // If the bottom block is in the same "try" region, then we extend the EH // region. Otherwise, we add the new block outside the "try" region. - bool extendRegion = BasicBlock::sameTryRegion(f, b); - BasicBlock* newT = fgNewBBbefore(BBJ_NONE, f, extendRegion); + const bool extendRegion = BasicBlock::sameTryRegion(t, b); + BasicBlock* newT = fgNewBBbefore(BBJ_NONE, t, extendRegion); if (!extendRegion) { // We need to set the EH region manually. Set it to be the same @@ -2804,7 +2794,7 @@ bool Compiler::optCanonicalizeLoop(unsigned char loopInd) // a call to fgUpdateChangedFlowGraph which will recompute the reachability sets anyway. // Redirect the "bottom" of the current loop to "newT". - BlockToBlockMap* blockMap = new (getAllocatorLoopHoist()) BlockToBlockMap(getAllocatorLoopHoist()); + BlockToBlockMap* blockMap = new (getAllocator(CMK_LoopOpt)) BlockToBlockMap(getAllocator(CMK_LoopOpt)); blockMap->Set(t, newT); optRedirectBlock(b, blockMap); @@ -2861,14 +2851,7 @@ bool Compiler::optCanonicalizeLoop(unsigned char loopInd) } } - assert(newT->bbNext == f); - if (f != t) - { - newT->bbJumpKind = BBJ_ALWAYS; - newT->bbJumpDest = t; - newT->bbStmtList = nullptr; - fgInsertStmtAtEnd(newT, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr))); - } + assert(newT->bbNext == t); // If it had been a do-while loop (top == entry), update entry, as well. BasicBlock* origE = optLoopTable[loopInd].lpEntry; @@ -2876,8 +2859,7 @@ bool Compiler::optCanonicalizeLoop(unsigned char loopInd) { optLoopTable[loopInd].lpEntry = newT; } - optLoopTable[loopInd].lpTop = newT; - optLoopTable[loopInd].lpFirst = newT; + optLoopTable[loopInd].lpTop = newT; newT->bbNatLoopNum = loopInd; @@ -2902,8 +2884,9 @@ bool Compiler::optCanonicalizeLoop(unsigned char loopInd) // If any loops nested in "loopInd" have the same head and entry as "loopInd", // it must be the case that they were do-while's (since "h" fell through to the entry). // The new node "newT" becomes the head of such loops. - for (unsigned char childLoop = optLoopTable[loopInd].lpChild; childLoop != BasicBlock::NOT_IN_LOOP; - childLoop = optLoopTable[childLoop].lpSibling) + for (unsigned char childLoop = optLoopTable[loopInd].lpChild; // + childLoop != BasicBlock::NOT_IN_LOOP; // + childLoop = optLoopTable[childLoop].lpSibling) { if (optLoopTable[childLoop].lpEntry == origE && optLoopTable[childLoop].lpHead == h && newT->bbJumpKind == BBJ_NONE && newT->bbNext == origE) @@ -4631,16 +4614,77 @@ PhaseStatus Compiler::optOptimizeLayout() return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; } +//------------------------------------------------------------------------ +// optMarkLoopHeads: Mark all potential loop heads as BBF_LOOP_HEAD. A potential loop head is a block +// targeted by a lexical back edge, where the source of the back edge is reachable from the block. +// Note that if there are no lexical back edges, there can't be any loops. +// +// If there are any potential loop heads, set `fgHasLoops` to `true`. +// +// Assumptions: +// The reachability sets must be computed and valid. +// +void Compiler::optMarkLoopHeads() +{ + assert(!fgCheapPredsValid); + assert(fgReachabilitySetsValid); + +#ifdef DEBUG + if (verbose) + { + printf("*************** In optMarkLoopHeads()\n"); + } +#endif + + bool hasLoops = false; + + for (BasicBlock* const block : Blocks()) + { + // Set BBF_LOOP_HEAD if we have backwards branches to this block. + + unsigned blockNum = block->bbNum; + for (BasicBlock* const predBlock : block->PredBlocks()) + { + if (blockNum <= predBlock->bbNum) + { + if (predBlock->bbJumpKind == BBJ_CALLFINALLY) + { + // Loops never have BBJ_CALLFINALLY as the source of their "back edge". + continue; + } + + // If block can reach predBlock then we have a loop head + if (BlockSetOps::IsMember(this, predBlock->bbReach, blockNum)) + { + hasLoops = true; + block->bbFlags |= BBF_LOOP_HEAD; + break; // No need to look at more `block` predecessors + } + } + } + } + + fgHasLoops = hasLoops; +} + //----------------------------------------------------------------------------- -// optFindLoops: find and classify natural loops +// optFindLoops: find loops in the function. +// +// The JIT recognizes two types of loops in a function: natural loops and "general" (or "unnatural") loops. +// Natural loops are those which get added to the loop table. Most downstream optimizations require +// using natural loops. See `optFindNaturalLoops` for a definition of the criteria for recognizing a natural loop. +// A general loop is defined as a lexical (program order) range of blocks where a later block branches to an +// earlier block (that is, there is a back edge in the flow graph), and the later block is reachable from the earlier +// block. General loops are used for weighting flow graph blocks (when there is no block profile data), as well as +// for determining if we require fully interruptible GC information. // // Notes: -// Also (re)sets all non-IBC block weights, and marks loops potentially needing -// alignment padding. +// Also (re)sets all non-IBC block weights, and marks loops potentially needing alignment padding. // PhaseStatus Compiler::optFindLoops() { noway_assert(opts.OptimizationEnabled()); + assert(fgDomsComputed); #ifdef DEBUG if (verbose) @@ -4649,50 +4693,46 @@ PhaseStatus Compiler::optFindLoops() } #endif + optMarkLoopHeads(); + optSetBlockWeights(); /* Were there any loops in the flow graph? */ if (fgHasLoops) { - /* now that we have dominator information we can find loops */ - optFindNaturalLoops(); - unsigned loopNum = 0; + // Now find the general loops and scale block weights. - /* Iterate over the flow graph, marking all loops */ + unsigned generalLoopCount = 0; - /* We will use the following terminology: - * top - the first basic block in the loop (i.e. the head of the backward edge) - * bottom - the last block in the loop (i.e. the block from which we jump to the top) - * lastBottom - used when we have multiple back-edges to the same top - */ + // We will use the following terminology: + // top - the first basic block in the loop (i.e. the head of the backward edge) + // bottom - the last block in the loop (i.e. the block from which we jump to the top) + // lastBottom - used when we have multiple back edges to the same top for (BasicBlock* const top : Blocks()) { + // Only consider `top` blocks already determined to be potential loop heads. + if (!top->isLoopHead()) + { + continue; + } + BasicBlock* foundBottom = nullptr; for (BasicBlock* const bottom : top->PredBlocks()) { - /* Is this a loop candidate? - We look for "back edges" */ - - /* is this a backward edge? (from BOTTOM to TOP) */ + // Is this a loop candidate? - We look for "back edges" + // Is this a backward edge? (from BOTTOM to TOP) if (top->bbNum > bottom->bbNum) { continue; } - /* 'top' also must have the BBF_LOOP_HEAD flag set */ - - if (top->isLoopHead() == false) - { - continue; - } - - /* We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops */ - + // We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops. if ((bottom->bbJumpKind != BBJ_COND) && (bottom->bbJumpKind != BBJ_ALWAYS)) { continue; @@ -4714,15 +4754,15 @@ PhaseStatus Compiler::optFindLoops() if (foundBottom) { - loopNum++; + generalLoopCount++; /* Mark all blocks between 'top' and 'bottom' */ - optMarkLoopBlocks(top, foundBottom, false); + optScaleLoopBlocks(top, foundBottom); } // We track at most 255 loops - if (loopNum == 255) + if (generalLoopCount == 255) { #if COUNT_LOOPS totalUnnatLoopOverflows++; @@ -4731,32 +4771,21 @@ PhaseStatus Compiler::optFindLoops() } } - // Check if any of the loops need alignment - - JITDUMP("\n"); - optIdentifyLoopsForAlignment(); + JITDUMP("\nFound a total of %d general loops.\n", generalLoopCount); #if COUNT_LOOPS - totalUnnatLoopCount += loopNum; + totalUnnatLoopCount += generalLoopCount; #endif -#ifdef DEBUG - if (verbose) - { - if (loopNum > 0) - { - printf("\nFound a total of %d loops.", loopNum); - printf("\nAfter loop weight marking:\n"); - fgDispBasicBlocks(); - printf("\n"); - } - } + // Check if any of the loops need alignment + optIdentifyLoopsForAlignment(); + } - fgDebugCheckLoopTable(); +#ifdef DEBUG + fgDebugCheckLoopTable(); #endif - optLoopsMarked = true; - } + optLoopsMarked = true; return PhaseStatus::MODIFIED_EVERYTHING; } @@ -5486,8 +5515,8 @@ void Compiler::optPerformHoistExpr(GenTree* origExpr, BasicBlock* exprBb, unsign { printf("\nHoisting a copy of "); printTreeID(origExpr); - printf(" into PreHeader for loop " FMT_LP " <" FMT_BB ".." FMT_BB ">:\n", lnum, - optLoopTable[lnum].lpFirst->bbNum, optLoopTable[lnum].lpBottom->bbNum); + printf(" into PreHeader for loop " FMT_LP " <" FMT_BB ".." FMT_BB ">:\n", lnum, optLoopTable[lnum].lpTop->bbNum, + optLoopTable[lnum].lpBottom->bbNum); gtDispTree(origExpr); printf("\n"); } @@ -6756,7 +6785,7 @@ void Compiler::optHoistLoopBlocks(unsigned loopNum, ArrayStack* blo JITDUMP("\n optHoistLoopBlocks " FMT_BB " (weight=%6s) of loop " FMT_LP " <" FMT_BB ".." FMT_BB ">, firstBlock is %s\n", - block->bbNum, refCntWtd2str(blockWeight), loopNum, loopDsc->lpFirst->bbNum, loopDsc->lpBottom->bbNum, + block->bbNum, refCntWtd2str(blockWeight), loopNum, loopDsc->lpTop->bbNum, loopDsc->lpBottom->bbNum, dspBool(block == loopDsc->lpEntry)); if (blockWeight < (BB_UNITY_WEIGHT / 10)) @@ -7587,10 +7616,10 @@ void Compiler::AddContainsCallAllContainingLoops(unsigned lnum) // alignment if (optLoopTable[lnum].lpChild == BasicBlock::NOT_IN_LOOP) { - BasicBlock* first = optLoopTable[lnum].lpFirst; - first->bbFlags &= ~BBF_LOOP_ALIGN; + BasicBlock* top = optLoopTable[lnum].lpTop; + top->bbFlags &= ~BBF_LOOP_ALIGN; JITDUMP("Removing LOOP_ALIGN flag for " FMT_LP " that starts at " FMT_BB " because loop has a call.\n", lnum, - first->bbNum); + top->bbNum); } #endif