From 2e96b632dc313e2421b04b9500f1d453aa0757c3 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 12 Aug 2023 00:28:48 +0200 Subject: [PATCH] JIT: Add a head merging pass Add a pass that does head merging to compliment the existing tail merging pass. Unlike tail merging this requires reordering the first statement with the terminator node of the predecessor, which requires some interference checking. Fix #90017 --- src/coreclr/jit/block.h | 5 + src/coreclr/jit/compiler.cpp | 4 +- src/coreclr/jit/compiler.h | 3 +- src/coreclr/jit/fgopt.cpp | 260 ++++++++++++++++++++++++++++++++++- src/coreclr/jit/fgstmt.cpp | 2 +- 5 files changed, 269 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/block.h b/src/coreclr/jit/block.h index cbdcd66af8590..21b37f17fc456 100644 --- a/src/coreclr/jit/block.h +++ b/src/coreclr/jit/block.h @@ -722,6 +722,11 @@ struct BasicBlock : private LIR::Range return KindIs(kind) || KindIs(rest...); } + bool HasTerminator() + { + return KindIs(BBJ_EHFINALLYRET, BBJ_EHFAULTRET, BBJ_EHFILTERRET, BBJ_COND, BBJ_SWITCH, BBJ_RETURN); + } + // NumSucc() gives the number of successors, and GetSucc() returns a given numbered successor. // // There are two versions of these functions: ones that take a Compiler* and ones that don't. You must diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index a04bd56057abb..44975eae92469 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4718,7 +4718,7 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl { // Tail merge // - DoPhase(this, PHASE_TAIL_MERGE, &Compiler::fgTailMerge); + DoPhase(this, PHASE_TAIL_MERGE, [this]() { return fgTailMerge(true); }); // Merge common throw blocks // @@ -4835,7 +4835,7 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // Second pass of tail merge // - DoPhase(this, PHASE_TAIL_MERGE2, &Compiler::fgTailMerge); + DoPhase(this, PHASE_TAIL_MERGE2, [this]() { return fgTailMerge(false); }); // Compute reachability sets and dominators. // diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 3214a8a581c82..0a3a67e2d140d 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -5519,7 +5519,8 @@ class Compiler void fgMoveBlocksAfter(BasicBlock* bStart, BasicBlock* bEnd, BasicBlock* insertAfterBlk); - PhaseStatus fgTailMerge(); + PhaseStatus fgTailMerge(bool early); + bool fgCanMoveFirstStatementIntoPred(bool early, Statement* firstStmt, BasicBlock* pred); enum FG_RELOCATE_TYPE { diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 480739ea69337..947093d1f60c5 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -6657,6 +6657,121 @@ void Compiler::fgCompDominatedByExceptionalEntryBlocks() } } +bool Compiler::fgCanMoveFirstStatementIntoPred(bool early, Statement* firstStmt, BasicBlock* pred) +{ + if (!pred->HasTerminator()) + { + return true; + } + + GenTree* tree1 = pred->lastStmt()->GetRootNode(); + GenTree* tree2 = firstStmt->GetRootNode(); + + GenTreeFlags tree1Flags = tree1->gtFlags; + GenTreeFlags tree2Flags = tree2->gtFlags; + + if (early) + { + tree1Flags |= gtHasLocalsWithAddrOp(tree1) ? GTF_GLOB_REF : GTF_EMPTY; + tree2Flags |= gtHasLocalsWithAddrOp(tree2) ? GTF_GLOB_REF : GTF_EMPTY; + } + + // We do not support embedded statements in the terminator node. + if ((tree1Flags & GTF_ASG) != 0) + { + JITDUMP(" no; terminator contains embedded store\n"); + return false; + } + if ((tree2Flags & GTF_ASG) != 0) + { + // Handle common case where the second statement is a top-level store. + if (!tree2->OperIsLocalStore()) + { + JITDUMP(" cannot reorder with GTF_ASG without top-level store"); + return false; + } + + GenTreeLclVarCommon* lcl = tree2->AsLclVarCommon(); + if ((lcl->Data()->gtFlags & GTF_ASG) != 0) + { + JITDUMP(" cannot reorder with embedded store"); + return false; + } + + LclVarDsc* dsc = lvaGetDesc(tree2->AsLclVarCommon()); + if ((tree1Flags & GTF_ALL_EFFECT) != 0) + { + if (early ? dsc->lvHasLdAddrOp : dsc->IsAddressExposed()) + { + JITDUMP(" cannot reorder store to exposed local with any side effect\n"); + return false; + } + } + + if (gtHasRef(tree1, lcl->GetLclNum())) + { + JITDUMP(" cannot reorder with interfering use\n"); + return false; + } + + if (dsc->lvIsStructField && gtHasRef(tree1, dsc->lvParentLcl)) + { + JITDUMP(" cannot reorder with interferring use of parent struct local\n"); + return false; + } + + if (dsc->lvPromoted) + { + for (int i = 0; i < dsc->lvFieldCnt; i++) + { + if (gtHasRef(tree1, dsc->lvFieldLclStart + i)) + { + JITDUMP(" cannot reorder with interferring use of struct field\n"); + return false; + } + } + } + + // We've validated that the store does not interfere. Get rid of the + // flag for the future checks. + tree2Flags &= ~GTF_ASG; + } + + if (((tree1Flags & GTF_CALL) != 0) && ((tree2Flags & GTF_ALL_EFFECT) != 0)) + { + JITDUMP(" cannot reorder call with any side effect\n"); + return false; + } + if (((tree1Flags & GTF_GLOB_REF) != 0) && ((tree2Flags & GTF_PERSISTENT_SIDE_EFFECTS) != 0)) + { + JITDUMP(" cannot reorder global reference with persistent side effects\n"); + return false; + } + if ((tree1Flags & GTF_ORDER_SIDEEFF) != 0) + { + if ((tree2Flags & (GTF_GLOB_REF | GTF_ORDER_SIDEEFF)) != 0) + { + JITDUMP(" cannot reorder ordering side effect\n"); + return false; + } + } + if ((tree2Flags & GTF_ORDER_SIDEEFF) != 0) + { + if ((tree1Flags & (GTF_GLOB_REF | GTF_ORDER_SIDEEFF)) != 0) + { + JITDUMP(" cannot reorder ordering side effect\n"); + return false; + } + } + if (((tree1Flags & GTF_EXCEPT) != 0) && ((tree2Flags & GTF_SIDE_EFFECT) != 0)) + { + JITDUMP(" cannot reorder exception with side effect\n"); + return false; + } + + return true; +} + //------------------------------------------------------------------------ // fgTailMerge: merge common sequences of statements in block predecessors // @@ -6680,7 +6795,7 @@ void Compiler::fgCompDominatedByExceptionalEntryBlocks() // incurring too much TP overhead. It's possible to make the merging // more efficient and if so it might be worth revising this value. // -PhaseStatus Compiler::fgTailMerge() +PhaseStatus Compiler::fgTailMerge(bool early) { bool madeChanges = false; int const mergeLimit = 50; @@ -7031,6 +7146,149 @@ PhaseStatus Compiler::fgTailMerge() iterateTailMerge(retryBlocks.Pop()); } + // Try head merging a block. + // If return value is true, retry. + // May also add to retryBlocks. + // + auto headMerge = [&](BasicBlock* block) -> bool { + + if (block->NumSucc(this) < 2) + { + // Nothing to merge here + return false; + } + + matchedPredInfo.Reset(); + + // Find the subset of preds that reach along non-critical edges + // and populate predInfo. + // + for (BasicBlock* const succBlock : block->Succs(this)) + { + if (succBlock->GetUniquePred(this) != block) + { + return false; + } + + if (!BasicBlock::sameEHRegion(block, succBlock)) + { + return false; + } + + Statement* firstStmt = nullptr; + // Walk past any GT_NOPs. + // + for (Statement* stmt : succBlock->Statements()) + { + if (stmt->GetRootNode()->OperIs(GT_NOP)) + { + continue; + } + + firstStmt = stmt; + break; + } + + // Block might be effectively empty. + // + if (firstStmt == nullptr) + { + return false; + } + + // Cannot move terminator statement. + // + if ((firstStmt == succBlock->lastStmt()) && succBlock->HasTerminator()) + { + return false; + } + + if (firstStmt->GetRootNode()->IsCall() && firstStmt->GetRootNode()->AsCall()->CanTailCall()) + { + return false; + } + + if ((matchedPredInfo.Height() == 0) || + GenTree::Compare(firstStmt->GetRootNode(), matchedPredInfo.BottomRef(0).m_stmt->GetRootNode())) + { + matchedPredInfo.Emplace(succBlock, firstStmt); + } + else + { + return false; + } + } + + Statement* firstStmt = matchedPredInfo.TopRef(0).m_stmt; + JITDUMP("All succs of " FMT_BB " start with the same tree\n", block->bbNum); + DISPSTMT(firstStmt); + + JITDUMP("Checking if we can move it into the predecessor...\n"); + + if (!fgCanMoveFirstStatementIntoPred(early, firstStmt, block)) + { + return false; + } + + JITDUMP("Moving statement\n"); + + for (int i = 0; i < matchedPredInfo.Height(); i++) + { + PredInfo& info = matchedPredInfo.TopRef(i); + Statement* const stmt = info.m_stmt; + BasicBlock* const succBlock = info.m_block; + + fgUnlinkStmt(succBlock, stmt); + + // Add one of the matching stmts to block, and + // update its flags. + // + if (i == 0) + { + fgInsertStmtNearEnd(block, stmt); + block->bbFlags |= succBlock->bbFlags & BBF_COPY_PROPAGATE; + } + + madeChanges = true; + } + + return true; + }; + + auto iterateHeadMerge = [&](BasicBlock* block) -> void { + + int numOpts = 0; + bool retry = true; + + while (retry) + { + retry = headMerge(block); + if (retry) + { + numOpts++; + } + } + + if (numOpts > 0) + { + JITDUMP("Did %d head merges in " FMT_BB "\n", numOpts, block->bbNum); + } + }; + + // Visit each block + // + for (BasicBlock* const block : Blocks()) + { + iterateHeadMerge(block); + } + + // Work through any retries + // + while (retryBlocks.Height() > 0) + { + iterateHeadMerge(retryBlocks.Pop()); + } + // If we altered flow, reset fgModified. Given where we sit in the // phase list, flow-dependent side data hasn't been built yet, so // nothing needs invalidation. diff --git a/src/coreclr/jit/fgstmt.cpp b/src/coreclr/jit/fgstmt.cpp index 4651acbdfb247..fead5b82e0b34 100644 --- a/src/coreclr/jit/fgstmt.cpp +++ b/src/coreclr/jit/fgstmt.cpp @@ -182,7 +182,7 @@ void Compiler::fgInsertStmtNearEnd(BasicBlock* block, Statement* stmt) // This routine can only be used when in tree order. assert(fgOrder == FGOrderTree); - if (block->KindIs(BBJ_EHFINALLYRET, BBJ_EHFAULTRET, BBJ_EHFILTERRET, BBJ_COND, BBJ_SWITCH, BBJ_RETURN)) + if (block->HasTerminator()) { Statement* firstStmt = block->firstStmt(); noway_assert(firstStmt != nullptr);