diff --git a/dotnet.sh b/dotnet.sh index a612ebac1ce1e4..54cc9bc8e7146f 100755 --- a/dotnet.sh +++ b/dotnet.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash - +set -x source="${BASH_SOURCE[0]}" # resolve $SOURCE until the file is no longer a symlink while [[ -h $source ]]; do diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index a04bd56057abb0..dac2f126f1cc51 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4746,6 +4746,9 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // DoPhase(this, PHASE_STR_ADRLCL, &Compiler::fgMarkAddressExposedLocals); + // Tail duplication + DoPhase(this, PHASE_TAIL_DUP, &Compiler::fgTailDuplicate); + // Do an early pass of liveness for forward sub and morph. This data is // valid until after morph. // diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 645e54258a637a..44aba9f2596d5a 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -5521,6 +5521,10 @@ class Compiler PhaseStatus fgTailMerge(); + PhaseStatus fgTailDuplicate(); + bool fgTailDuplicateBlock(BasicBlock* const block); + bool fgLocalIsConstantOut(GenTreeLclVarCommon* lcl, BasicBlock* const block, unsigned limit = 10); + enum FG_RELOCATE_TYPE { FG_RELOCATE_TRY, // relocate the 'try' region diff --git a/src/coreclr/jit/compphases.h b/src/coreclr/jit/compphases.h index 9fe31ca846410e..90901d3ecfbde6 100644 --- a/src/coreclr/jit/compphases.h +++ b/src/coreclr/jit/compphases.h @@ -56,6 +56,7 @@ CompPhaseNameMacro(PHASE_COMPUTE_EDGE_WEIGHTS, "Compute edge weights (1, f CompPhaseNameMacro(PHASE_CREATE_FUNCLETS, "Create EH funclets", false, -1, false) #endif // FEATURE_EH_FUNCLETS CompPhaseNameMacro(PHASE_TAIL_MERGE, "Tail merge", false, -1, false) +CompPhaseNameMacro(PHASE_TAIL_DUP, "Tail duplicate", false, -1, false) CompPhaseNameMacro(PHASE_MERGE_THROWS, "Merge throw blocks", false, -1, false) CompPhaseNameMacro(PHASE_INVERT_LOOPS, "Invert loops", false, -1, false) CompPhaseNameMacro(PHASE_TAIL_MERGE2, "Post-morph tail merge", false, -1, false) diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 480739ea693376..cb68182dec8422 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -7039,3 +7039,337 @@ PhaseStatus Compiler::fgTailMerge() return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; } + +//------------------------------------------------------------------------ +// fgTailDuplicate: duplicate sequences of statements in block predecessors +// +// Returns: +// Suitable phase status. +// +// Notes: +// Looks for cases where moving statements from a block to its predecessor +// is likely to result in greatly simplified computations. +// +// Similar in spirit to fgOptimizeUncondBranchToSimpleCond but more general +// as it considers a wider set of computations (not just relops feeding branches). +// +// Ideally we'd do this with SSA available, but our inability to handle +// side effects and to update SSA makes that too challenging. +// +PhaseStatus Compiler::fgTailDuplicate() +{ + if (!opts.OptimizationEnabled()) + { + return PhaseStatus::MODIFIED_NOTHING; + } + + const bool isEnabled = JitConfig.JitEnableTailDuplication() > 0; + if (!isEnabled) + { + JITDUMP("Tail duplication disabled by JitEnableTailDuplication\n"); + return PhaseStatus::MODIFIED_NOTHING; + } + + // Must be run when local threading is enabled... + // + assert(fgNodeThreading == NodeThreading::AllLocals); + + bool madeChanges = false; + + // Visit each block + // + for (BasicBlock* const block : Blocks()) + { + madeChanges |= fgTailDuplicateBlock(block); + } + + return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; +} + +bool Compiler::fgTailDuplicateBlock(BasicBlock* const block) +{ + // We only consider blocks with muliple preds without + // any critical edges. + // + // We could handle critical edges by splitting. + // + unsigned numPreds = 0; + + for (BasicBlock* const predBlock : block->PredBlocks()) + { + if (predBlock->GetUniqueSucc() != block) + { + // critical edge + return false; + } + + if (predBlock->isBBCallAlwaysPairTail()) + { + // this block must remain empty + return false; + } + + numPreds++; + } + + if (numPreds < 2) + { + return false; + } + + if (block->bbJumpKind == BBJ_RETURN) + { + if (!compMethodHasRetVal()) + { + return false; + } + + if (compMethodReturnsRetBufAddr()) + { + return false; + } + + if (genActualType(info.compRetType) == TYP_STRUCT) + { + return false; + } + } + + JITDUMP("\nConsidering tail dup in " FMT_BB ": has %u preds\n", block->bbNum, numPreds); + + bool madeChanges = false; + + // Walk statements from first. + // + Statement* const lastStmt = block->lastStmt(); + for (Statement* stmt : block->Statements()) + { + JITDUMP("\nScanning " FMT_STMT "\n", stmt->GetID()); + unsigned numLocal = 0; + unsigned numAllConstant = 0; + unsigned numSomeConstant = 0; + for (GenTreeLclVarCommon* lcl : stmt->LocalsTreeList()) + { + if (lcl->OperIsLocalStore()) + { + continue; + } + + JITDUMP("... checking [%06u] V%02u\n", dspTreeID(lcl), lcl->GetLclNum()); + numLocal++; + unsigned numConstantThisLocal = 0; + for (BasicBlock* const predBlock : block->PredBlocks()) + { + // todo: possibly memoize this if we have repeated uses + // of the same local in this or across statements. + // + if (fgLocalIsConstantOut(lcl, predBlock)) + { + numConstantThisLocal++; + } + } + + if (numConstantThisLocal == numPreds) + { + numAllConstant++; + } + else if (numConstantThisLocal > 0) + { + numSomeConstant++; + } + } + + if (numLocal == 0) + { + // Todo: consider allowing reordering stmts... + break; + } + + bool tailDuplicate = false; + + if (numAllConstant == numLocal) + { + // todo: cost checks... (heuristic) + JITDUMP("All %u locals are constant in all preds, duplicating " FMT_STMT "\n", numLocal, stmt->GetID()); + tailDuplicate = true; + } + else if (numAllConstant > 0) + { + // Could handle this case with a more stringent cost check + JITDUMP(FMT_STMT " %u of the %u locals are constant in all preds\n", stmt->GetID(), numAllConstant, + numLocal); + } + else if (numSomeConstant > 0) + { + // Could handle this case with an even more stringent cost check + JITDUMP(FMT_STMT " %u of the %u locals are constant in some preds\n", stmt->GetID(), numSomeConstant, + numLocal); + } + + // leave last statement alone for branches and such. + // + if ((stmt == lastStmt) && !(block->KindIs(BBJ_ALWAYS, BBJ_NONE, BBJ_RETURN))) + { + // if there are constants here we could split the computation off and hoist it like + // we do for returns... not clear if this is better or not. + // + JITDUMP(FMT_STMT " has control flow impact\n", stmt->GetID()); + tailDuplicate = false; + } + + if (!tailDuplicate) + { + // We didn't duplicate this statement, so we can't duplicate any of the ones that follow. + break; + } + + if (tailDuplicate) + { + JITDUMP(" ...duplicating " FMT_STMT "\n", numLocal, stmt->GetID()); + // If stmt is GT_RETURN, we need to spill to a new temp, and then dup the spill. + // + bool deleteStmt = true; + + if (stmt->GetRootNode()->OperIs(GT_RETURN)) + { + assert(compMethodHasRetVal()); + GenTree* const retNode = stmt->GetRootNode(); + GenTree* const retVal = retNode->AsOp()->gtOp1; + unsigned const retLclNum = lvaGrabTemp(true DEBUGARG("Tail dup return value temp")); + var_types const retLclType = info.compRetType; + + lvaGetDesc(retLclNum)->lvType = retLclType; + GenTree* const retTemp = gtNewLclvNode(retLclNum, retLclType); + retTemp->gtFlags |= GTF_DONT_CSE; + retNode->AsOp()->gtOp1 = retTemp; + fgSequenceLocals(stmt); + gtUpdateStmtSideEffects(stmt); + + GenTree* const retStore = gtNewTempStore(retLclNum, retVal); + stmt = gtNewStmt(retStore); + fgSequenceLocals(stmt); + deleteStmt = false; + } + + for (BasicBlock* const predBlock : block->PredBlocks()) + { + GenTree* const clone = gtCloneExpr(stmt->GetRootNode()); + noway_assert(clone); + Statement* const cloneStmt = gtNewStmt(clone); + fgInsertStmtAtEnd(predBlock, cloneStmt); + fgSequenceLocals(cloneStmt); + madeChanges = true; + + for (GenTreeLclVarCommon* clonedLcl : cloneStmt->LocalsTreeList()) + { + LclVarDsc* const clonedLclVarDsc = lvaGetDesc(clonedLcl); + clonedLclVarDsc->incLvRefCntSaturating(1, RCS_EARLY); + } + } + + if (deleteStmt) + { + for (GenTreeLclVarCommon* origLcl : stmt->LocalsTreeList()) + { + LclVarDsc* const origLclVarDsc = lvaGetDesc(origLcl); + unsigned short refCnt = origLclVarDsc->lvRefCnt(RCS_EARLY); + assert(refCnt > 0); + origLclVarDsc->setLvRefCnt(refCnt - 1, RCS_EARLY); + } + + fgRemoveStmt(block, stmt); + } + else + { + // Next statement is now the modified return, leave it as is. + // + break; + } + } + + if (stmt == lastStmt) + { + break; + } + } + + return madeChanges; +} + +// very similar to fgBlockEndFavorsTailDuplication +bool Compiler::fgLocalIsConstantOut(GenTreeLclVarCommon* lcl, BasicBlock* const block, unsigned limit) +{ + Statement* const lastStmt = block->lastStmt(); + Statement* const firstStmt = block->FirstNonPhiDef(); + + // Check up to limit statements... + // + unsigned count = 0; + + if (lastStmt != nullptr) + { + Statement* stmt = lastStmt; + + while (count < limit) + { + count++; + + if ((stmt->GetRootNode()->gtFlags & GTF_ASG) != 0) + { + JITDUMP("Checking " FMT_STMT " for constant def of V%02u\n", stmt->GetID(), lcl->GetLclNum()); + + for (GenTreeLclVarCommon* tree : stmt->LocalsTreeList()) + { + if (tree->OperIsLocalStore() && !tree->OperIsBlkOp() && (tree->GetLclNum() == lcl->GetLclNum())) + { + GenTree* const data = tree->Data(); + if (data->OperIsConst() || data->OperIs(GT_LCL_ADDR)) + { + JITDUMP("[%06u] constant\n", dspTreeID(data)); + return true; + } + else if (data->OperIsLocal()) + { + lcl = data->AsLclVarCommon(); + JITDUMP("Chaining to new local V%02u\n", lcl->GetLclNum()); + } + else + { + JITDUMP("[%06u] not constant or local\n", dspTreeID(data)); + return false; + } + } + } + } + + Statement* const prevStmt = stmt->GetPrevStmt(); + + // The statement list prev links wrap from first->last, so exit + // when we see lastStmt again, as we've now seen all statements. + // + if (prevStmt == lastStmt) + { + break; + } + + stmt = prevStmt; + } + + if (count == limit) + { + return false; + } + } + + BasicBlock* const pred = block->GetUniquePred(this); + + // Walk back in flow + // + if (pred != nullptr) + { + JITDUMP("Chaining back to " FMT_BB " remaining limit %u\n", pred->bbNum, limit - count); + return fgLocalIsConstantOut(lcl, pred, limit - count); + } + + return false; +} diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index b3384031f355eb..5222d51edc0448 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -624,6 +624,9 @@ CONFIG_INTEGER(JitCFGUseDispatcher, W("JitCFGUseDispatcher"), 2) // Enable tail merging CONFIG_INTEGER(JitEnableTailMerge, W("JitEnableTailMerge"), 1) +// Enable tail duplication +CONFIG_INTEGER(JitEnableTailDuplication, W("JitEnableTailDuplication"), 1) + // Enable physical promotion CONFIG_INTEGER(JitEnablePhysicalPromotion, W("JitEnablePhysicalPromotion"), 1)