Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add option to instrument using 64-bit counts #51625

Merged
merged 8 commits into from
May 3, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 35 additions & 15 deletions src/coreclr/ToolBox/superpmi/superpmi-shared/methodcontext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5526,9 +5526,15 @@ void MethodContext::dmpGetPgoInstrumentationResults(DWORDLONG key, const Agnosti
case ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount:
printf("B %u", *(unsigned*)(pInstrumentationData + pBuf[i].Offset));
break;
case ICorJitInfo::PgoInstrumentationKind::BasicBlockLongCount:
printf("B %llu", *(uint64_t*)(pInstrumentationData + pBuf[i].Offset));
break;
case ICorJitInfo::PgoInstrumentationKind::EdgeIntCount:
printf("E %u", *(unsigned*)(pInstrumentationData + pBuf[i].Offset));
break;
case ICorJitInfo::PgoInstrumentationKind::EdgeLongCount:
printf("E %llu", *(uint64_t*)(pInstrumentationData + pBuf[i].Offset));
break;
case ICorJitInfo::PgoInstrumentationKind::TypeHandleHistogramCount:
printf("T %u", *(unsigned*)(pInstrumentationData + pBuf[i].Offset));
break;
Expand Down Expand Up @@ -6704,28 +6710,40 @@ int MethodContext::dumpMethodIdentityInfoToBuffer(char* buff, int len, bool igno

size_t minOffset = (size_t) ~0;
size_t maxOffset = 0;
uint32_t totalCount = 0;
uint64_t totalCount = 0;

if (SUCCEEDED(pgoHR))
{
// Locate the range of the counter data.
// Locate the range of the data.
//
for (UINT32 i = 0; i < schemaCount; i++)
{
if ((schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount)
|| (schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::EdgeIntCount))
size_t start = schema[i].Offset;
size_t end;
switch (schema[i].InstrumentationKind)
{
if (schema[i].Offset < minOffset)
{
minOffset = schema[i].Offset;
}
case ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount:
case ICorJitInfo::PgoInstrumentationKind::EdgeIntCount:
totalCount += *(uint32_t*)(schemaData + schema[i].Offset);
end = start + 4;
break;
case ICorJitInfo::PgoInstrumentationKind::BasicBlockLongCount:
case ICorJitInfo::PgoInstrumentationKind::EdgeLongCount:
totalCount += *(uint64_t*)(schemaData + schema[i].Offset);
end = start + 8;
break;
default:
continue;
}
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have done this so far, but shouldn't we ideally also be taking other PGO data into account here? @AndyAyersMS

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps? I left these out on purpose, though we might reconsider.

The class profile data ends up interspersed with the count data, so the hash below will typically incorporate those values; the schema records themselves don't contain that much interesting content.


if (schema[i].Offset > maxOffset)
{
maxOffset = schema[i].Offset;
}
if (start < minOffset)
{
minOffset = start;
}

totalCount += *(uint32_t*)(schemaData + schema[i].Offset);
if (end > maxOffset)
{
maxOffset = end;
}
}

Expand All @@ -6734,10 +6752,10 @@ int MethodContext::dumpMethodIdentityInfoToBuffer(char* buff, int len, bool igno
if (minOffset < maxOffset)
{
char pgoHash[MD5_HASH_BUFFER_SIZE];
dumpMD5HashToBuffer(schemaData + minOffset, (int)(maxOffset + sizeof(int) - minOffset), pgoHash,
dumpMD5HashToBuffer(schemaData + minOffset, (int)(maxOffset - minOffset), pgoHash,
MD5_HASH_BUFFER_SIZE);

t = sprintf_s(buff, len, " Pgo Counters %u, Count %u, Hash: %s", schemaCount, totalCount, pgoHash);
t = sprintf_s(buff, len, " Pgo Counters %u, Count %llu, Hash: %s", schemaCount, totalCount, pgoHash);
buff += t;
len -= t;
}
Expand Down Expand Up @@ -6780,6 +6798,7 @@ bool MethodContext::hasPgoData(bool& hasEdgeProfile, bool& hasClassProfile, bool
{
hasEdgeProfile = false;
hasClassProfile = false;
hasLikelyClass = false;

// Obtain the Method Info structure for this method
CORINFO_METHOD_INFO info;
Expand All @@ -6799,6 +6818,7 @@ bool MethodContext::hasPgoData(bool& hasEdgeProfile, bool& hasClassProfile, bool
for (UINT32 i = 0; i < schemaCount; i++)
{
hasEdgeProfile |= (schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::EdgeIntCount);
hasEdgeProfile |= (schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::EdgeLongCount);
hasClassProfile |= (schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::TypeHandleHistogramCount);
hasLikelyClass |= (schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::GetLikelyClass);

Expand Down
6 changes: 4 additions & 2 deletions src/coreclr/inc/corjit.h
Original file line number Diff line number Diff line change
Expand Up @@ -357,12 +357,14 @@ class ICorJitInfo : public ICorDynamicInfo
DescriptorMin = 0x40,

Done = None, // All instrumentation schemas must end with a record which is "Done"
BasicBlockIntCount = (DescriptorMin * 1) | FourByte, // 4 byte basic block counter, using unsigned 4 byte int
BasicBlockIntCount = (DescriptorMin * 1) | FourByte, // basic block counter using unsigned 4 byte int
BasicBlockLongCount = (DescriptorMin * 1) | EightByte, // basic block counter using unsigned 8 byte int
TypeHandleHistogramCount = (DescriptorMin * 2) | FourByte | AlignPointer, // 4 byte counter that is part of a type histogram
TypeHandleHistogramTypeHandle = (DescriptorMin * 3) | TypeHandle, // TypeHandle that is part of a type histogram
Version = (DescriptorMin * 4) | None, // Version is encoded in the Other field of the schema
NumRuns = (DescriptorMin * 5) | None, // Number of runs is encoded in the Other field of the schema
EdgeIntCount = (DescriptorMin * 6) | FourByte, // 4 byte edge counter, using unsigned 4 byte int
EdgeIntCount = (DescriptorMin * 6) | FourByte, // edge counter using unsigned 4 byte int
EdgeLongCount = (DescriptorMin * 6) | EightByte, // edge counter using unsigned 8 byte int
GetLikelyClass = (DescriptorMin * 7) | TypeHandle, // Compressed get likely class data
};

Expand Down
6 changes: 3 additions & 3 deletions src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7905,13 +7905,13 @@ void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm)

/*****************************************************************************
*
* Display an reloc value
* If we are formatting for an assembly listing don't print the hex value
* Display a reloc value
* If we are formatting for a diffable assembly listing don't print the hex value
* since it will prevent us from doing assembly diffs
*/
void emitter::emitDispReloc(ssize_t value)
{
if (emitComp->opts.disAsm)
if (emitComp->opts.disAsm && emitComp->opts.disDiffable)
{
printf("(reloc)");
}
Expand Down
89 changes: 59 additions & 30 deletions src/coreclr/jit/fgprofile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -194,12 +194,22 @@ bool Compiler::fgGetProfileWeightForBasicBlock(IL_OFFSET offset, BasicBlock::wei

for (UINT32 i = 0; i < fgPgoSchemaCount; i++)
{
if ((fgPgoSchema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount) &&
((IL_OFFSET)fgPgoSchema[i].ILOffset == offset))
if ((IL_OFFSET)fgPgoSchema[i].ILOffset != offset)
{
continue;
}

if (fgPgoSchema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount)
{
*weightWB = (BasicBlock::weight_t) * (uint32_t*)(fgPgoData + fgPgoSchema[i].Offset);
return true;
}

if (fgPgoSchema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockLongCount)
{
*weightWB = (BasicBlock::weight_t) * (uint64_t*)(fgPgoData + fgPgoSchema[i].Offset);
return true;
}
}

*weightWB = 0;
Expand Down Expand Up @@ -334,9 +344,11 @@ void BlockCountInstrumentor::BuildSchemaElements(BasicBlock* block, Schema& sche
ICorJitInfo::PgoInstrumentationSchema schemaElem;
schemaElem.Count = 1;
schemaElem.Other = 0;
schemaElem.InstrumentationKind = ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount;
schemaElem.ILOffset = offset;
schemaElem.Offset = 0;
schemaElem.InstrumentationKind = JitConfig.JitCollect64BitCounts()
? ICorJitInfo::PgoInstrumentationKind::BasicBlockLongCount
: ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount;
schemaElem.ILOffset = offset;
schemaElem.Offset = 0;

schema.push_back(schemaElem);

Expand All @@ -362,21 +374,23 @@ void BlockCountInstrumentor::BuildSchemaElements(BasicBlock* block, Schema& sche
//
void BlockCountInstrumentor::Instrument(BasicBlock* block, Schema& schema, BYTE* profileMemory)
{
const int schemaIndex = (int)block->bbCountSchemaIndex;
const ICorJitInfo::PgoInstrumentationSchema& entry = schema[block->bbCountSchemaIndex];

assert(block->bbCodeOffs == (IL_OFFSET)schema[schemaIndex].ILOffset);
assert(schema[schemaIndex].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount);
size_t addrOfCurrentExecutionCount = (size_t)(schema[schemaIndex].Offset + profileMemory);
assert(block->bbCodeOffs == (IL_OFFSET)entry.ILOffset);
assert((entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount) ||
(entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockLongCount));
size_t addrOfCurrentExecutionCount = (size_t)(entry.Offset + profileMemory);

var_types typ =
entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount ? TYP_INT : TYP_LONG;
// Read Basic-Block count value
GenTree* valueNode =
m_comp->gtNewIndOfIconHandleNode(TYP_INT, addrOfCurrentExecutionCount, GTF_ICON_BBC_PTR, false);
GenTree* valueNode = m_comp->gtNewIndOfIconHandleNode(typ, addrOfCurrentExecutionCount, GTF_ICON_BBC_PTR, false);

// Increment value by 1
GenTree* rhsNode = m_comp->gtNewOperNode(GT_ADD, TYP_INT, valueNode, m_comp->gtNewIconNode(1));
GenTree* rhsNode = m_comp->gtNewOperNode(GT_ADD, typ, valueNode, m_comp->gtNewIconNode(1, typ));

// Write new Basic-Block count value
GenTree* lhsNode = m_comp->gtNewIndOfIconHandleNode(TYP_INT, addrOfCurrentExecutionCount, GTF_ICON_BBC_PTR, false);
GenTree* lhsNode = m_comp->gtNewIndOfIconHandleNode(typ, addrOfCurrentExecutionCount, GTF_ICON_BBC_PTR, false);
GenTree* asgNode = m_comp->gtNewAssignNode(lhsNode, rhsNode);

m_comp->fgNewStmtAtBeg(block, asgNode);
Expand Down Expand Up @@ -411,11 +425,12 @@ void BlockCountInstrumentor::InstrumentMethodEntry(Schema& schema, BYTE* profile
assert(m_entryBlock != nullptr);
assert(m_entryBlock->bbCodeOffs == 0);

const int firstSchemaIndex = (int)m_entryBlock->bbCountSchemaIndex;
assert((IL_OFFSET)schema[firstSchemaIndex].ILOffset == 0);
assert(schema[firstSchemaIndex].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount);
const ICorJitInfo::PgoInstrumentationSchema& entry = schema[m_entryBlock->bbCountSchemaIndex];
assert((IL_OFFSET)entry.ILOffset == 0);
assert((entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount) ||
(entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockLongCount));

const size_t addrOfFirstExecutionCount = (size_t)(schema[firstSchemaIndex].Offset + profileMemory);
const size_t addrOfFirstExecutionCount = (size_t)(entry.Offset + profileMemory);

GenTree* arg;

Expand Down Expand Up @@ -447,13 +462,15 @@ void BlockCountInstrumentor::InstrumentMethodEntry(Schema& schema, BYTE* profile
GenTreeCall::Use* args = m_comp->gtNewCallArgs(arg);
GenTree* call = m_comp->gtNewHelperCallNode(CORINFO_HELP_BBT_FCN_ENTER, TYP_VOID, args);

var_types typ =
entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount ? TYP_INT : TYP_LONG;
// Read Basic-Block count value
//
GenTree* valueNode = m_comp->gtNewIndOfIconHandleNode(TYP_INT, addrOfFirstExecutionCount, GTF_ICON_BBC_PTR, false);
GenTree* valueNode = m_comp->gtNewIndOfIconHandleNode(typ, addrOfFirstExecutionCount, GTF_ICON_BBC_PTR, false);

// Compare Basic-Block count value against zero
//
GenTree* relop = m_comp->gtNewOperNode(GT_NE, TYP_INT, valueNode, m_comp->gtNewIconNode(0, TYP_INT));
GenTree* relop = m_comp->gtNewOperNode(GT_NE, typ, valueNode, m_comp->gtNewIconNode(0, typ));
GenTree* colon = new (m_comp, GT_COLON) GenTreeColon(TYP_VOID, m_comp->gtNewNothingNode(), call);
GenTree* cond = m_comp->gtNewQmarkNode(TYP_VOID, relop, colon);
Statement* stmt = m_comp->gtNewStmt(cond);
Expand Down Expand Up @@ -1041,9 +1058,11 @@ void EfficientEdgeCountInstrumentor::BuildSchemaElements(BasicBlock* block, Sche
ICorJitInfo::PgoInstrumentationSchema schemaElem;
schemaElem.Count = 1;
schemaElem.Other = targetOffset;
schemaElem.InstrumentationKind = ICorJitInfo::PgoInstrumentationKind::EdgeIntCount;
schemaElem.ILOffset = sourceOffset;
schemaElem.Offset = 0;
schemaElem.InstrumentationKind = JitConfig.JitCollect64BitCounts()
? ICorJitInfo::PgoInstrumentationKind::EdgeLongCount
: ICorJitInfo::PgoInstrumentationKind::EdgeIntCount;
schemaElem.ILOffset = sourceOffset;
schemaElem.Offset = 0;

schema.push_back(schemaElem);

Expand Down Expand Up @@ -1082,9 +1101,12 @@ void EfficientEdgeCountInstrumentor::Instrument(BasicBlock* block, Schema& schem
// Sanity checks.
//
assert((schemaIndex >= 0) && (schemaIndex < (int)schema.size()));
assert(schema[schemaIndex].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::EdgeIntCount);

size_t addrOfCurrentExecutionCount = (size_t)(schema[schemaIndex].Offset + profileMemory);
const ICorJitInfo::PgoInstrumentationSchema& entry = schema[schemaIndex];
assert((entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::EdgeIntCount) ||
(entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::EdgeLongCount));

size_t addrOfCurrentExecutionCount = (size_t)(entry.Offset + profileMemory);

// Determine where to place the probe.
//
Expand Down Expand Up @@ -1124,16 +1146,17 @@ void EfficientEdgeCountInstrumentor::Instrument(BasicBlock* block, Schema& schem

// Place the probe

var_types typ =
entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::EdgeIntCount ? TYP_INT : TYP_LONG;
// Read Basic-Block count value
GenTree* valueNode =
m_comp->gtNewIndOfIconHandleNode(TYP_INT, addrOfCurrentExecutionCount, GTF_ICON_BBC_PTR, false);
m_comp->gtNewIndOfIconHandleNode(typ, addrOfCurrentExecutionCount, GTF_ICON_BBC_PTR, false);

// Increment value by 1
GenTree* rhsNode = m_comp->gtNewOperNode(GT_ADD, TYP_INT, valueNode, m_comp->gtNewIconNode(1));
GenTree* rhsNode = m_comp->gtNewOperNode(GT_ADD, typ, valueNode, m_comp->gtNewIconNode(1, typ));

// Write new Basic-Block count value
GenTree* lhsNode =
m_comp->gtNewIndOfIconHandleNode(TYP_INT, addrOfCurrentExecutionCount, GTF_ICON_BBC_PTR, false);
GenTree* lhsNode = m_comp->gtNewIndOfIconHandleNode(typ, addrOfCurrentExecutionCount, GTF_ICON_BBC_PTR, false);
GenTree* asgNode = m_comp->gtNewAssignNode(lhsNode, rhsNode);

m_comp->fgNewStmtAtBeg(instrumentedBlock, asgNode);
Expand Down Expand Up @@ -1725,10 +1748,12 @@ PhaseStatus Compiler::fgIncorporateProfileData()
break;

case ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount:
case ICorJitInfo::PgoInstrumentationKind::BasicBlockLongCount:
fgPgoBlockCounts++;
break;

case ICorJitInfo::PgoInstrumentationKind::EdgeIntCount:
case ICorJitInfo::PgoInstrumentationKind::EdgeLongCount:
fgPgoEdgeCounts++;
break;

Expand Down Expand Up @@ -2157,12 +2182,16 @@ void EfficientEdgeCountReconstructor::Prepare()
switch (schemaEntry.InstrumentationKind)
{
case ICorJitInfo::PgoInstrumentationKind::EdgeIntCount:
case ICorJitInfo::PgoInstrumentationKind::EdgeLongCount:
{
// Optimization TODO: if profileCount is zero, we can just ignore this edge
// and the right things will happen.
//
uint32_t const profileCount = *(uint32_t*)(m_comp->fgPgoData + schemaEntry.Offset);
BasicBlock::weight_t const weight = (BasicBlock::weight_t)profileCount;
uint64_t const profileCount =
schemaEntry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::EdgeIntCount
? *(uint32_t*)(m_comp->fgPgoData + schemaEntry.Offset)
: *(uint64_t*)(m_comp->fgPgoData + schemaEntry.Offset);
BasicBlock::weight_t const weight = (BasicBlock::weight_t)profileCount;

m_allWeightsZero &= (profileCount == 0);

Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/importer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15422,7 +15422,7 @@ void Compiler::impImportBlockCode(BasicBlock* block)
if (allocSize <= maxSize)
{
const unsigned stackallocAsLocal = lvaGrabTemp(false DEBUGARG("stackallocLocal"));
JITDUMP("Converting stackalloc of %lld bytes to new local V%02u\n", allocSize,
JITDUMP("Converting stackalloc of %zd bytes to new local V%02u\n", allocSize,
stackallocAsLocal);
lvaTable[stackallocAsLocal].lvType = TYP_BLK;
lvaTable[stackallocAsLocal].lvExactSize = (unsigned)allocSize;
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/jitconfigvalues.h
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,7 @@ CONFIG_INTEGER(JitMinimalJitProfiling, W("JitMinimalJitProfiling"), 1)
CONFIG_INTEGER(JitMinimalPrejitProfiling, W("JitMinimalPrejitProfiling"), 0)
CONFIG_INTEGER(JitClassProfiling, W("JitClassProfiling"), 1)
CONFIG_INTEGER(JitEdgeProfiling, W("JitEdgeProfiling"), 1)
CONFIG_INTEGER(JitCollect64BitCounts, W("JitCollect64BitCounts"), 0) // Collect counts as 64-bit values.

// Profile consumption options
CONFIG_INTEGER(JitDisablePgo, W("JitDisablePgo"), 0) // Ignore pgo data for all methods
Expand Down
Loading