Skip to content

Commit

Permalink
Comments and cleanup for loop cloning (#49768)
Browse files Browse the repository at this point in the history
* Comments and cleanup for loop cloning

This is a no-diff change

Added various comments to document loop cloning.

Standardized and improved some logging.

Consolidated more loop cloning condition checking into
`optIsLoopClonable` that was previously in `optIdentifyLoopOptInfo`.

Replaced some `0` weights with `BB_ZERO_WEIGHT`.

Made FMT_BB use pervasive.

* Review feedback

Added FMT_LP formatting string.

Cached often-used `optLoopTable[loopInd]` expression.

Added `const` to many loop query member functions.

Added static `GenTree::OperIs(compareOper, oper, oper, oper...)`
functions for simplifying oper check expressions where the
compareOper isn't from a GenTree node `OperGet()`. (This doesn't
need to be part of GenTree, but it doesn't hurt, either.)

Added a few more comments.

* Fix static OperIs

Rename to StaticOperIs. It appears the compiler uses the wrong
template in some cases, but doesn't complain about duplicate
options, leading to run-time failures.
  • Loading branch information
BruceForstall authored Mar 23, 2021
1 parent 3c1f142 commit cb88894
Show file tree
Hide file tree
Showing 20 changed files with 634 additions and 426 deletions.
5 changes: 4 additions & 1 deletion src/coreclr/jit/block.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@ typedef BitVec_ValRet_T ASSERT_VALRET_TP;
// This define is used with string concatenation to put this in printf format strings (Note that %u means unsigned int)
#define FMT_BB "BB%02u"

// Use this format for loop table indices.
#define FMT_LP "L%02u"

// And this format for profile weights
#define FMT_WT "%.7g"

Expand Down Expand Up @@ -616,7 +619,7 @@ struct BasicBlock : private LIR::Range
this->bbFlags &= ~BBF_PROF_WEIGHT;
}

if (this->bbWeight == 0)
if (this->bbWeight == BB_ZERO_WEIGHT)
{
this->bbFlags |= BBF_RUN_RARELY;
}
Expand Down
5 changes: 4 additions & 1 deletion src/coreclr/jit/codegenlinear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,9 @@ void CodeGen::genCodeForBBlist()
if ((block->bbPrev != nullptr) && (block->bbPrev->bbJumpKind == BBJ_COND) &&
(block->bbWeight != block->bbPrev->bbWeight))
{
JITDUMP("Adding label due to BB weight difference: BBJ_COND " FMT_BB " with weight " FMT_WT
" different from " FMT_BB " with weight " FMT_WT "\n",
block->bbPrev->bbNum, block->bbPrev->bbWeight, block->bbNum, block->bbWeight);
needLabel = true;
}

Expand Down Expand Up @@ -588,7 +591,7 @@ void CodeGen::genCodeForBBlist()
{
if (!foundMismatchedRegVar)
{
JITDUMP("Mismatched live reg vars after BB%02u:", block->bbNum);
JITDUMP("Mismatched live reg vars after " FMT_BB ":", block->bbNum);
foundMismatchedRegVar = true;
}
JITDUMP(" V%02u", compiler->lvaTrackedIndexToLclNum(mismatchLiveVarIndex));
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4870,7 +4870,7 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
DoPhase(this, PHASE_FIND_LOOPS, &Compiler::optFindLoops);

// Clone loops with optimization opportunities, and
// choose the one based on dynamic condition evaluation.
// choose one based on dynamic condition evaluation.
//
DoPhase(this, PHASE_CLONE_LOOPS, &Compiler::optCloneLoops);

Expand Down
86 changes: 41 additions & 45 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -6193,20 +6193,10 @@ class Compiler
PhaseStatus optOptimizeLayout(); // Optimize the BasicBlock layout of the method
PhaseStatus optFindLoops(); // Finds loops and records them in the loop table

// Optionally clone loops in the loop table.
void optCloneLoops();

// Clone loop "loopInd" in the loop table.
void optCloneLoop(unsigned loopInd, LoopCloneContext* context);

// Ensure that loop "loopInd" has a unique head block. (If the existing entry has
// non-loop predecessors other than the head entry, create a new, empty block that goes (only) to the entry,
// and redirects the preds of the entry to this new block.) Sets the weight of the newly created block to
// "ambientWeight".
void optEnsureUniqueHead(unsigned loopInd, BasicBlock::weight_t ambientWeight);

void optUnrollLoops(); // Unrolls loops (needs to have cost info)

void optRemoveRedundantZeroInits();

protected:
Expand Down Expand Up @@ -6320,83 +6310,88 @@ class Compiler

/* The following values are set only for iterator loops, i.e. has the flag LPFLG_ITER set */

GenTree* lpIterTree; // The "i = i <op> const" tree
unsigned lpIterVar(); // iterator variable #
int lpIterConst(); // the constant with which the iterator is incremented
genTreeOps lpIterOper(); // the type of the operation on the iterator (ASG_ADD, ASG_SUB, etc.)
void VERIFY_lpIterTree();
GenTree* lpIterTree; // The "i = i <op> const" tree
unsigned lpIterVar() const; // iterator variable #
int lpIterConst() const; // the constant with which the iterator is incremented
genTreeOps lpIterOper() const; // the type of the operation on the iterator (ASG_ADD, ASG_SUB, etc.)
void VERIFY_lpIterTree() const;

var_types lpIterOperType(); // For overflow instructions
var_types lpIterOperType() const; // For overflow instructions

union {
int lpConstInit; // initial constant value of iterator : Valid if LPFLG_CONST_INIT
unsigned lpVarInit; // initial local var number to which we initialize the iterator : Valid if
// LPFLG_VAR_INIT
};

/* The following is for LPFLG_ITER loops only (i.e. the loop condition is "i RELOP const or var" */
// The following is for LPFLG_ITER loops only (i.e. the loop condition is "i RELOP const or var"

GenTree* lpTestTree; // pointer to the node containing the loop test
genTreeOps lpTestOper() const; // the type of the comparison between the iterator and the limit (GT_LE, GT_GE,
// etc.)
void VERIFY_lpTestTree() const;

GenTree* lpTestTree; // pointer to the node containing the loop test
genTreeOps lpTestOper(); // the type of the comparison between the iterator and the limit (GT_LE, GT_GE, etc.)
void VERIFY_lpTestTree();
bool lpIsReversed() const; // true if the iterator node is the second operand in the loop condition
GenTree* lpIterator() const; // the iterator node in the loop test
GenTree* lpLimit() const; // the limit node in the loop test

bool lpIsReversed(); // true if the iterator node is the second operand in the loop condition
GenTree* lpIterator(); // the iterator node in the loop test
GenTree* lpLimit(); // the limit node in the loop test
// Limit constant value of iterator - loop condition is "i RELOP const"
// : Valid if LPFLG_CONST_LIMIT
int lpConstLimit() const;

int lpConstLimit(); // limit constant value of iterator - loop condition is "i RELOP const" : Valid if
// LPFLG_CONST_LIMIT
unsigned lpVarLimit(); // the lclVar # in the loop condition ( "i RELOP lclVar" ) : Valid if
// LPFLG_VAR_LIMIT
bool lpArrLenLimit(Compiler* comp, ArrIndex* index); // The array length in the loop condition ( "i RELOP
// arr.len" or "i RELOP arr[i][j].len" ) : Valid if
// LPFLG_ARRLEN_LIMIT
// The lclVar # in the loop condition ( "i RELOP lclVar" )
// : Valid if LPFLG_VAR_LIMIT
unsigned lpVarLimit() const;

// The array length in the loop condition ( "i RELOP arr.len" or "i RELOP arr[i][j].len" )
// : Valid if LPFLG_ARRLEN_LIMIT
bool lpArrLenLimit(Compiler* comp, ArrIndex* index) const;

// Returns "true" iff "*this" contains the blk.
bool lpContains(BasicBlock* blk)
bool lpContains(BasicBlock* blk) const
{
return lpFirst->bbNum <= blk->bbNum && blk->bbNum <= lpBottom->bbNum;
}
// Returns "true" iff "*this" (properly) contains the range [first, bottom] (allowing firsts
// to be equal, but requiring bottoms to be different.)
bool lpContains(BasicBlock* first, BasicBlock* bottom)
bool lpContains(BasicBlock* first, BasicBlock* bottom) const
{
return lpFirst->bbNum <= first->bbNum && bottom->bbNum < lpBottom->bbNum;
}

// Returns "true" iff "*this" (properly) contains "lp2" (allowing firsts to be equal, but requiring
// bottoms to be different.)
bool lpContains(const LoopDsc& lp2)
bool lpContains(const LoopDsc& lp2) const
{
return lpContains(lp2.lpFirst, lp2.lpBottom);
}

// Returns "true" iff "*this" is (properly) contained by the range [first, bottom]
// (allowing firsts to be equal, but requiring bottoms to be different.)
bool lpContainedBy(BasicBlock* first, BasicBlock* bottom)
bool lpContainedBy(BasicBlock* first, BasicBlock* bottom) const
{
return first->bbNum <= lpFirst->bbNum && lpBottom->bbNum < bottom->bbNum;
}

// Returns "true" iff "*this" is (properly) contained by "lp2"
// (allowing firsts to be equal, but requiring bottoms to be different.)
bool lpContainedBy(const LoopDsc& lp2)
bool lpContainedBy(const LoopDsc& lp2) const
{
return lpContains(lp2.lpFirst, lp2.lpBottom);
}

// Returns "true" iff "*this" is disjoint from the range [top, bottom].
bool lpDisjoint(BasicBlock* first, BasicBlock* bottom)
bool lpDisjoint(BasicBlock* first, BasicBlock* bottom) const
{
return bottom->bbNum < lpFirst->bbNum || lpBottom->bbNum < first->bbNum;
}
// Returns "true" iff "*this" is disjoint from "lp2".
bool lpDisjoint(const LoopDsc& lp2)
bool lpDisjoint(const LoopDsc& lp2) const
{
return lpDisjoint(lp2.lpFirst, lp2.lpBottom);
}
// Returns "true" iff the loop is well-formed (see code for defn).
bool lpWellFormed()
bool lpWellFormed() const
{
return lpFirst->bbNum <= lpTop->bbNum && lpTop->bbNum <= lpEntry->bbNum &&
lpEntry->bbNum <= lpBottom->bbNum &&
Expand Down Expand Up @@ -6436,9 +6431,9 @@ class Compiler
BasicBlock* lpBottom,
unsigned char lpExitCnt,
BasicBlock* lpExit,
unsigned parentLoop = BasicBlock::NOT_IN_LOOP);
void optPrintLoopInfo(unsigned lnum);
void optPrintLoopRecording(unsigned lnum);
unsigned parentLoop = BasicBlock::NOT_IN_LOOP) const;
void optPrintLoopInfo(unsigned lnum) const;
void optPrintLoopRecording(unsigned lnum) const;

void optCheckPreds();
#endif
Expand Down Expand Up @@ -6476,8 +6471,6 @@ class Compiler
// iff "l2" is not NOT_IN_LOOP, and "l1" contains "l2".
bool optLoopContains(unsigned l1, unsigned l2);

// Requires "loopInd" to be a valid index into the loop table.

// Updates the loop table by changing loop "loopInd", whose head is required
// to be "from", to be "to". Also performs this transformation for any
// loop nested in "loopInd" that shares the same head as "loopInd".
Expand All @@ -6489,10 +6482,13 @@ class Compiler

// Marks the containsCall information to "lnum" and any parent loops.
void AddContainsCallAllContainingLoops(unsigned lnum);

// Adds the variable liveness information from 'blk' to "lnum" and any parent loops.
void AddVariableLivenessAllContainingLoops(unsigned lnum, BasicBlock* blk);

// Adds "fldHnd" to the set of modified fields of "lnum" and any parent loops.
void AddModifiedFieldAllContainingLoops(unsigned lnum, CORINFO_FIELD_HANDLE fldHnd);

// Adds "elemType" to the set of modified array element types of "lnum" and any parent loops.
void AddModifiedElemTypeAllContainingLoops(unsigned lnum, CORINFO_CLASS_HANDLE elemType);

Expand All @@ -6501,7 +6497,7 @@ class Compiler
void optCopyBlkDest(BasicBlock* from, BasicBlock* to);

// Returns true if 'block' is an entry block for any loop in 'optLoopTable'
bool optIsLoopEntry(BasicBlock* block);
bool optIsLoopEntry(BasicBlock* block) const;

// The depth of the loop described by "lnum" (an index into the loop table.) (0 == top level)
unsigned optLoopDepth(unsigned lnum)
Expand Down Expand Up @@ -7397,7 +7393,7 @@ class Compiler
void optObtainLoopCloningOpts(LoopCloneContext* context);
bool optIsLoopClonable(unsigned loopInd);

bool optCanCloneLoops();
bool optLoopCloningEnabled();

#ifdef DEBUG
void optDebugLogLoopCloning(BasicBlock* block, Statement* insertBefore);
Expand Down
30 changes: 15 additions & 15 deletions src/coreclr/jit/compiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3388,7 +3388,7 @@ inline void Compiler::LoopDsc::AddModifiedElemType(Compiler* comp, CORINFO_CLASS
lpArrayElemTypesModified->Set(structHnd, true, ClassHandleSet::Overwrite);
}

inline void Compiler::LoopDsc::VERIFY_lpIterTree()
inline void Compiler::LoopDsc::VERIFY_lpIterTree() const
{
#ifdef DEBUG
assert(lpFlags & LPFLG_ITER);
Expand All @@ -3397,8 +3397,8 @@ inline void Compiler::LoopDsc::VERIFY_lpIterTree()

assert(lpIterTree->OperIs(GT_ASG));

GenTree* lhs = lpIterTree->AsOp()->gtOp1;
GenTree* rhs = lpIterTree->AsOp()->gtOp2;
const GenTree* lhs = lpIterTree->AsOp()->gtOp1;
const GenTree* rhs = lpIterTree->AsOp()->gtOp2;
assert(lhs->OperGet() == GT_LCL_VAR);

switch (rhs->gtOper)
Expand All @@ -3420,15 +3420,15 @@ inline void Compiler::LoopDsc::VERIFY_lpIterTree()

//-----------------------------------------------------------------------------

inline unsigned Compiler::LoopDsc::lpIterVar()
inline unsigned Compiler::LoopDsc::lpIterVar() const
{
VERIFY_lpIterTree();
return lpIterTree->AsOp()->gtOp1->AsLclVarCommon()->GetLclNum();
}

//-----------------------------------------------------------------------------

inline int Compiler::LoopDsc::lpIterConst()
inline int Compiler::LoopDsc::lpIterConst() const
{
VERIFY_lpIterTree();
GenTree* rhs = lpIterTree->AsOp()->gtOp2;
Expand All @@ -3437,14 +3437,14 @@ inline int Compiler::LoopDsc::lpIterConst()

//-----------------------------------------------------------------------------

inline genTreeOps Compiler::LoopDsc::lpIterOper()
inline genTreeOps Compiler::LoopDsc::lpIterOper() const
{
VERIFY_lpIterTree();
GenTree* rhs = lpIterTree->AsOp()->gtOp2;
return rhs->OperGet();
}

inline var_types Compiler::LoopDsc::lpIterOperType()
inline var_types Compiler::LoopDsc::lpIterOperType() const
{
VERIFY_lpIterTree();

Expand All @@ -3459,7 +3459,7 @@ inline var_types Compiler::LoopDsc::lpIterOperType()
return type;
}

inline void Compiler::LoopDsc::VERIFY_lpTestTree()
inline void Compiler::LoopDsc::VERIFY_lpTestTree() const
{
#ifdef DEBUG
assert(lpFlags & LPFLG_ITER);
Expand Down Expand Up @@ -3505,7 +3505,7 @@ inline void Compiler::LoopDsc::VERIFY_lpTestTree()

//-----------------------------------------------------------------------------

inline bool Compiler::LoopDsc::lpIsReversed()
inline bool Compiler::LoopDsc::lpIsReversed() const
{
VERIFY_lpTestTree();
return ((lpTestTree->AsOp()->gtOp2->gtOper == GT_LCL_VAR) &&
Expand All @@ -3514,7 +3514,7 @@ inline bool Compiler::LoopDsc::lpIsReversed()

//-----------------------------------------------------------------------------

inline genTreeOps Compiler::LoopDsc::lpTestOper()
inline genTreeOps Compiler::LoopDsc::lpTestOper() const
{
VERIFY_lpTestTree();
genTreeOps op = lpTestTree->OperGet();
Expand All @@ -3523,7 +3523,7 @@ inline genTreeOps Compiler::LoopDsc::lpTestOper()

//-----------------------------------------------------------------------------

inline GenTree* Compiler::LoopDsc::lpIterator()
inline GenTree* Compiler::LoopDsc::lpIterator() const
{
VERIFY_lpTestTree();

Expand All @@ -3532,7 +3532,7 @@ inline GenTree* Compiler::LoopDsc::lpIterator()

//-----------------------------------------------------------------------------

inline GenTree* Compiler::LoopDsc::lpLimit()
inline GenTree* Compiler::LoopDsc::lpLimit() const
{
VERIFY_lpTestTree();

Expand All @@ -3541,7 +3541,7 @@ inline GenTree* Compiler::LoopDsc::lpLimit()

//-----------------------------------------------------------------------------

inline int Compiler::LoopDsc::lpConstLimit()
inline int Compiler::LoopDsc::lpConstLimit() const
{
VERIFY_lpTestTree();
assert(lpFlags & LPFLG_CONST_LIMIT);
Expand All @@ -3553,7 +3553,7 @@ inline int Compiler::LoopDsc::lpConstLimit()

//-----------------------------------------------------------------------------

inline unsigned Compiler::LoopDsc::lpVarLimit()
inline unsigned Compiler::LoopDsc::lpVarLimit() const
{
VERIFY_lpTestTree();
assert(lpFlags & LPFLG_VAR_LIMIT);
Expand All @@ -3565,7 +3565,7 @@ inline unsigned Compiler::LoopDsc::lpVarLimit()

//-----------------------------------------------------------------------------

inline bool Compiler::LoopDsc::lpArrLenLimit(Compiler* comp, ArrIndex* index)
inline bool Compiler::LoopDsc::lpArrLenLimit(Compiler* comp, ArrIndex* index) const
{
VERIFY_lpTestTree();
assert(lpFlags & LPFLG_ARRLEN_LIMIT);
Expand Down
6 changes: 3 additions & 3 deletions src/coreclr/jit/fgbasic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4219,12 +4219,12 @@ BasicBlock* Compiler::fgConnectFallThrough(BasicBlock* bSrc, BasicBlock* bDst)
flowList* newEdge = fgGetPredForBlock(jmpBlk, bSrc);

jmpBlk->bbWeight = (newEdge->edgeWeightMin() + newEdge->edgeWeightMax()) / 2;
if (bSrc->bbWeight == 0)
if (bSrc->bbWeight == BB_ZERO_WEIGHT)
{
jmpBlk->bbWeight = 0;
jmpBlk->bbWeight = BB_ZERO_WEIGHT;
}

if (jmpBlk->bbWeight == 0)
if (jmpBlk->bbWeight == BB_ZERO_WEIGHT)
{
jmpBlk->bbFlags |= BBF_RUN_RARELY;
}
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/fgdiagnostic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -764,7 +764,7 @@ bool Compiler::fgDumpFlowGraph(Phases phase)
{
if (createDotFile)
{
fprintf(fgxFile, " BB%02u [label = \"BB%02u\\n\\n", block->bbNum, block->bbNum);
fprintf(fgxFile, " " FMT_BB " [label = \"" FMT_BB "\\n\\n", block->bbNum, block->bbNum);

// "Raw" Profile weight
if (block->hasProfileWeight())
Expand Down
Loading

0 comments on commit cb88894

Please sign in to comment.