Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JIT: Allow sharing call temps within statements #79574

Merged
merged 3 commits into from
Jan 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -1877,6 +1877,7 @@ class Compiler
friend struct GenTree;
friend class MorphInitBlockHelper;
friend class MorphCopyBlockHelper;
friend class SharedTempsScope;
friend class CallArgs;
friend class IndirectCallTransformer;

Expand Down Expand Up @@ -5638,8 +5639,8 @@ class Compiler
bool compCanEncodePtrArgCntMax();

private:
hashBv* fgOutgoingArgTemps;
hashBv* fgCurrentlyInUseArgTemps;
hashBv* fgAvailableOutgoingArgTemps;
ArrayStack<unsigned>* fgUsedSharedTemps;

void fgSetRngChkTarget(GenTree* tree, bool delay = true);

Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/fgbasic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,8 @@ void Compiler::fgInit()
/* This is set by fgComputeReachability */
fgEnterBlks = BlockSetOps::UninitVal();

fgUsedSharedTemps = nullptr;

#if defined(FEATURE_EH_FUNCLETS) && defined(TARGET_ARM)
fgAlwaysBlks = BlockSetOps::UninitVal();
#endif // defined(FEATURE_EH_FUNCLETS) && defined(TARGET_ARM)
Expand Down
19 changes: 8 additions & 11 deletions src/coreclr/jit/hashbv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -611,36 +611,32 @@ void hashBv::Resize(int newSize)
#ifdef DEBUG
void hashBv::dump()
{
bool first = true;
indexType index;
bool first = true;

// uncomment to print internal implementation details
// DBEXEC(TRUE, printf("[%d(%d)(nodes:%d)]{ ", hashtable_size(), countBits(), this->numNodes));

printf("{");
FOREACH_HBV_BIT_SET(index, this)
{
ForEachHbvBitSet(*this, [&](indexType index) {
if (!first)
{
printf(" ");
}
printf("%d", index);
first = false;
}
NEXT_HBV_BIT_SET;
return HbvWalk::Continue;
});
printf("}\n");
}

void hashBv::dumpFancy()
{
indexType index;
indexType last_1 = -1;
indexType last_0 = -1;

printf("{");
printf("count:%d", this->countBits());
FOREACH_HBV_BIT_SET(index, this)
{
ForEachHbvBitSet(*this, [&](indexType index) {
if (last_1 != index - 1)
{
if (last_0 + 1 != last_1)
Expand All @@ -654,8 +650,9 @@ void hashBv::dumpFancy()
last_0 = index - 1;
}
last_1 = index;
}
NEXT_HBV_BIT_SET;

return HbvWalk::Continue;
});

// Print the last one
if (last_0 + 1 != last_1)
Expand Down
70 changes: 42 additions & 28 deletions src/coreclr/jit/hashbv.h
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ class hashBv
void dump();
void dumpFancy();
#endif // DEBUG
__forceinline int hashtable_size()
__forceinline int hashtable_size() const
{
return 1 << this->log2_hashSize;
}
Expand Down Expand Up @@ -306,35 +306,49 @@ class hashBvGlobalData
hashBv* hbvFreeList;
};

// clang-format off
#define FOREACH_HBV_BIT_SET(index, bv) \
{ \
for (int hashNum=0; hashNum<(bv)->hashtable_size(); hashNum++) {\
hashBvNode *node = (bv)->nodeArr[hashNum];\
while (node) { \
indexType base = node->baseIndex; \
for (int el=0; el<node->numElements(); el++) {\
elemType _i = 0; \
elemType _e = node->elements[el]; \
while (_e) { \
int _result = BitScanForwardPtr((DWORD *) &_i, _e); \
assert(_result); \
(index) = base + (el*BITS_PER_ELEMENT) + _i; \
_e ^= (elemType(1) << _i);

#define NEXT_HBV_BIT_SET \
}\
}\
node = node->next; \
}\
}\
} \
//clang-format on
enum class HbvWalk
{
Continue,
Abort,
};

template <typename TFunctor>
HbvWalk ForEachHbvBitSet(const hashBv& bv, TFunctor func)
{
for (int hashNum = 0; hashNum < bv.hashtable_size(); hashNum++)
{
hashBvNode* node = bv.nodeArr[hashNum];
while (node)
{
indexType base = node->baseIndex;
for (int el = 0; el < node->numElements(); el++)
{
elemType i = 0;
elemType e = node->elements[el];
while (e)
{
int result = BitScanForwardPtr((DWORD*)&i, e);
assert(result);
indexType index = base + (el * BITS_PER_ELEMENT) + i;
e ^= (elemType(1) << i);

if (func(index) == HbvWalk::Abort)
{
return HbvWalk::Abort;
}
}
}
node = node->next;
}
}

return HbvWalk::Continue;
}

#ifdef DEBUG
void SimpleDumpNode(hashBvNode *n);
void DumpNode(hashBvNode *n);
void SimpleDumpDualNode(hashBv *a, hashBv *b, hashBvNode *n, hashBvNode *m);
void SimpleDumpNode(hashBvNode* n);
void DumpNode(hashBvNode* n);
void SimpleDumpDualNode(hashBv* a, hashBv* b, hashBvNode* n, hashBvNode* m);
#endif // DEBUG

#endif
84 changes: 51 additions & 33 deletions src/coreclr/jit/morph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ PhaseStatus Compiler::fgMorphInit()
// Initialize the BlockSet epoch
NewBasicBlockEpoch();

fgOutgoingArgTemps = nullptr;
fgAvailableOutgoingArgTemps = hashBv::Create(this);

// Insert call to class constructor as the first basic block if
// we were asked to do so.
Expand Down Expand Up @@ -147,11 +147,31 @@ GenTree* Compiler::fgMorphCastIntoHelper(GenTree* tree, int helper, GenTree* ope
return result;
}

/*****************************************************************************
*
* Convert the given node into a call to the specified helper passing
* the given argument list.
*/
class SharedTempsScope
{
Compiler* m_comp;
ArrayStack<unsigned> m_usedSharedTemps;
ArrayStack<unsigned>* m_prevUsedSharedTemps;

public:
SharedTempsScope(Compiler* comp)
: m_comp(comp)
, m_usedSharedTemps(comp->getAllocator(CMK_CallArgs))
, m_prevUsedSharedTemps(comp->fgUsedSharedTemps)
{
comp->fgUsedSharedTemps = &m_usedSharedTemps;
}

~SharedTempsScope()
{
m_comp->fgUsedSharedTemps = m_prevUsedSharedTemps;

for (int i = 0; i < m_usedSharedTemps.Height(); i++)
{
m_comp->fgAvailableOutgoingArgTemps->setBit((indexType)m_usedSharedTemps.Top(i));
}
}
};

//------------------------------------------------------------------------
// fgMorphIntoHelperCall:
Expand Down Expand Up @@ -236,6 +256,7 @@ GenTree* Compiler::fgMorphIntoHelperCall(GenTree* tree, int helper, bool morphAr

if (morphArgs)
{
SharedTempsScope scope(this);
tree = fgMorphArgs(call);
}

Expand Down Expand Up @@ -3981,11 +4002,6 @@ void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, CallArg* arg)

JITDUMP("making an outgoing copy for struct arg\n");

if (fgOutgoingArgTemps == nullptr)
{
fgOutgoingArgTemps = hashBv::Create(this);
}

CORINFO_CLASS_HANDLE copyBlkClass = arg->GetSignatureClassHandle();
unsigned tmp = 0;
bool found = false;
Expand All @@ -3994,19 +4010,18 @@ void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, CallArg* arg)
// We do not reuse within a statement.
if (!opts.MinOpts())
{
indexType lclNum;
FOREACH_HBV_BIT_SET(lclNum, fgOutgoingArgTemps)
{
LclVarDsc* varDsc = lvaGetDesc((unsigned)lclNum);
if ((varDsc->GetStructHnd() == copyBlkClass) && !fgCurrentlyInUseArgTemps->testBit(lclNum))
{
tmp = (unsigned)lclNum;
found = true;
JITDUMP("reusing outgoing struct arg\n");
break;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FOREACH_HBV_BIT_SET expands into 4 nested loops, so this break is not doing what is expected here.

I have removed the macros and turned it into a callback based function instead to make it harder to make the same mistake in the future.

I first tried turning it into an iterator but it was quite complex and hard to follow when the loops are flattened, so I went with the callback approach instead.

}
}
NEXT_HBV_BIT_SET;
found = ForEachHbvBitSet(*fgAvailableOutgoingArgTemps, [&](indexType lclNum) {
LclVarDsc* varDsc = lvaGetDesc((unsigned)lclNum);
if ((varDsc->GetStructHnd() == copyBlkClass))
{
tmp = (unsigned)lclNum;
JITDUMP("reusing outgoing struct arg V%02u\n", tmp);
fgAvailableOutgoingArgTemps->clearBit(lclNum);
return HbvWalk::Abort;
}

return HbvWalk::Continue;
}) == HbvWalk::Abort;
}

// Create the CopyBlk tree and insert it.
Expand All @@ -4020,11 +4035,16 @@ void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, CallArg* arg)
{
lvaSetStructUsedAsVarArg(tmp);
}

fgOutgoingArgTemps->setBit(tmp);
}

fgCurrentlyInUseArgTemps->setBit(tmp);
if (fgUsedSharedTemps != nullptr)
{
fgUsedSharedTemps->Push(tmp);
}
else
{
assert(!fgGlobalMorph);
}

// Copy the valuetype to the temp
GenTree* dest = gtNewLclvNode(tmp, lvaGetDesc(tmp)->TypeGet());
Expand Down Expand Up @@ -8052,6 +8072,10 @@ GenTree* Compiler::fgMorphCall(GenTreeCall* call)

compCurBB->bbFlags |= BBF_HAS_CALL; // This block has a call

// From this point on disallow shared temps to be reused until we are done
// processing the call.
SharedTempsScope sharedTemps(this);

// Process the "normal" argument list
call = fgMorphArgs(call);
noway_assert(call->gtOper == GT_CALL);
Expand Down Expand Up @@ -13755,8 +13779,6 @@ void Compiler::fgMorphStmts(BasicBlock* block)
{
fgRemoveRestOfBlock = false;

fgCurrentlyInUseArgTemps = hashBv::Create(this);

for (Statement* const stmt : block->Statements())
{
if (fgRemoveRestOfBlock)
Expand Down Expand Up @@ -13784,10 +13806,6 @@ void Compiler::fgMorphStmts(BasicBlock* block)

GenTree* morphedTree = fgMorphTree(oldTree);

// mark any outgoing arg temps as free so we can reuse them in the next statement.

fgCurrentlyInUseArgTemps->ZeroAll();

// Has fgMorphStmt been sneakily changed ?

if ((stmt->GetRootNode() != oldTree) || (block != compCurBB))
Expand Down