From cb7f42f2fa03ed14f05c2310e96642579f6e3cf4 Mon Sep 17 00:00:00 2001 From: Bruce Forstall Date: Tue, 4 Apr 2023 18:52:06 -0700 Subject: [PATCH] Update comments --- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/fgopt.cpp | 10 +++++++--- src/coreclr/jit/loopcloning.cpp | 4 ++-- src/coreclr/jit/optimizer.cpp | 33 ++++++++++++++++++++++++++++----- 4 files changed, 38 insertions(+), 11 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 6fd8e1442f7d18..b227a344410f72 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6511,7 +6511,7 @@ class Compiler public: LoopDsc* optLoopTable; // loop descriptor table bool optLoopTableValid; // info in loop table should be valid - bool optLoopsRequirePreHeaders; // Do we require that all loops have pre-headers? + bool optLoopsRequirePreHeaders; // Do we require that all loops (in the loop table) have pre-headers? unsigned char optLoopCount; // number of tracked loops unsigned char loopAlignCandidates; // number of loops identified for alignment diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index ddfa21b49cbb9a..84e6ce0ef21d76 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -1937,8 +1937,8 @@ bool Compiler::fgCanCompactBlocks(BasicBlock* block, BasicBlock* bNext) } // Don't allow removing an empty loop pre-header. - // We can compact a pre-header into an empty `block`, as long as we propagate the BBF_LOOP_PREHEADER - // bit properly. + // We can compact a pre-header `bNext` into an empty `block` since BBF_COMPACT_UPD propagates + // BBF_LOOP_PREHEADER to `block`. if (optLoopsRequirePreHeaders) { if (((block->bbFlags & BBF_LOOP_PREHEADER) != 0) && (bNext->countOfInEdges() != 1)) @@ -2056,7 +2056,11 @@ void Compiler::fgCompactBlocks(BasicBlock* block, BasicBlock* bNext) JITDUMP("Second block has %u other incoming edges\n", bNext->countOfInEdges()); assert(block->isEmpty()); - // `block` can no longer be a loop pre-header (if it was before). + // When loops require pre-headers, `block` cannot be a pre-header. + // We should have screened this out in fgCanCompactBlocks(). + // + // When pre-headers are not required, then if `block` was a pre-header, + // it no longer is. // assert(!optLoopsRequirePreHeaders || ((block->bbFlags & BBF_LOOP_PREHEADER) == 0)); block->bbFlags &= ~BBF_LOOP_PREHEADER; diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index 681df8e88394fa..0f28c613470919 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -2148,8 +2148,8 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) } #endif - // If the loop we're cloning contains a nested loop, we need to clear the pre-header bit on - // the nested loop pre-header block, since it will no longer be a loop pre-header. (This is because + // If the loop we're cloning contains nested loops, we need to clear the pre-header bit on + // any nested loop pre-header blocks, since they will no longer be loop pre-headers. (This is because // we don't add the slow loop or its child loops to the loop table. It would be simplest to // just re-build the loop table if we want to enable loop optimization of the slow path loops.) if ((newBlk->bbFlags & BBF_LOOP_PREHEADER) != 0) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 1939447014dc3c..96790a373513c8 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -2718,7 +2718,7 @@ void Compiler::optFindNaturalLoops() fgUpdateChangedFlowGraph(FlowGraphUpdates::COMPUTE_DOMS); } - // Create loop pre-header for every loop. + // Create a loop pre-header for every loop. bool modForPreHeader = false; for (unsigned loopInd = 0; loopInd < optLoopCount; loopInd++) { @@ -2729,7 +2729,6 @@ void Compiler::optFindNaturalLoops() } if (modForPreHeader) { - // The predecessors were maintained in fgCreateLoopPreHeader; don't rebuild them. fgUpdateChangedFlowGraph(FlowGraphUpdates::COMPUTE_DOMS); } @@ -4600,8 +4599,6 @@ PhaseStatus Compiler::optUnrollLoops() // // If the initBlock is a BBJ_COND drop the condition (and make initBlock a BBJ_NONE block). // - // Also, make sure HEAD is a BBJ_NONE block. - // if (initBlock->bbJumpKind == BBJ_COND) { assert(dupCond); @@ -6952,7 +6949,7 @@ bool Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt) ArrayStack defExec(getAllocatorLoopHoist()); // Add the pre-headers of any child loops to the list of blocks to consider for hoisting. - // Note that these are not definitely executed. However, it is a heuristic that they will + // Note that these are not necessarily definitely executed. However, it is a heuristic that they will // often provide good opportunities for further hoisting since we hoist from inside-out, // and the inner loop may have already hoisted something loop-invariant to them. If the child // loop pre-header block would be added anyway (by dominating the loop exit block), we don't @@ -6962,6 +6959,32 @@ bool Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt) // assumed that the order does not matter for correctness (since there is no execution order known). // Note that the order does matter for the hoisting profitability heuristics, as we might // run out of hoisting budget when processing the blocks. + // + // For example, consider this loop nest: + // + // for (....) { // loop L00 + // pre-header 1 + // for (...) { // loop L01 + // } + // // pre-header 2 + // for (...) { // loop L02 + // // pre-header 3 + // for (...) { // loop L03 + // } + // } + // } + // + // When processing the outer loop L00 (with an assumed single exit), we will push on the defExec stack + // pre-header 2, pre-header 1, the loop exit block, any IDom tree blocks leading to the entry block, + // and finally the entry block. (Note that the child loop iteration order of a loop is from "farthest" + // from the loop "head" to "nearest".) Blocks are considered for hoisting in the opposite order. + // + // Note that pre-header 3 is not pushed, since it is not a direct child. It would have been processed + // when loop L02 was considered for hoisting. + // + // The order of pushing pre-header 1 and pre-header 2 is based on the order in the loop table (which is + // convenient). But note that it is arbitrary because there is not guaranteed execution order amongst + // the child loops. int childLoopPreHeaders = 0; for (BasicBlock::loopNumber childLoop = pLoopDsc->lpChild; //