Skip to content

Commit

Permalink
JIT: add ability to pad profile counters and adjust scalable count th… (
Browse files Browse the repository at this point in the history
#91081)

* `DOTNET_JitCounterPadding`: adds the ability to pad profile counters by some
number of counter-sized slots, to mitigate the effects of false sharing. For
example with 64 bit counters, setting `DOTNET_JitCounterPadding=8` means each
counter is now on its own cache line (for xarch).

* `DOTNET_TieredPGO_ScalableCountThreshold`: adds the ability to alter the scalable
profile counter's threshold for switching to approximate counting. Defaults to 13
which means profile counts are exact up to 2^13 = 8192 and approximate above that.
Lower values will reduce the volume of counter updates (also mitigating false sharing
impact) but make the counts more approximate.

Co-authored-by: Jakob Botsch Nielsen <[email protected]>
  • Loading branch information
AndyAyersMS and jakobbotsch authored Aug 27, 2023
1 parent 538cd03 commit f5881e5
Show file tree
Hide file tree
Showing 6 changed files with 31 additions and 7 deletions.
4 changes: 4 additions & 0 deletions src/coreclr/inc/clrconfigvalues.h
Original file line number Diff line number Diff line change
Expand Up @@ -618,6 +618,10 @@ RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_TieredPGO_InstrumentOnlyHotCode, W("TieredP

// By default, we only use optimizations in instrumented tiers for hot R2R code only.
RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_TieredPGO_InstrumentedTierAlwaysOptimized, W("TieredPGO_InstrumentedTierAlwaysOptimized"), 0, "Always use optimizations inside instrumented tiers")

// If scalable counters are used, set the threshold for approximate counting.
RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_TieredPGO_ScalableCountThreshold, W("TieredPGO_ScalableCountThreshold"), 13, "Log2 threshold where counting becomes approximate")

#endif

///
Expand Down
8 changes: 8 additions & 0 deletions src/coreclr/jit/fgprofile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -562,6 +562,10 @@ void BlockCountInstrumentor::BuildSchemaElements(BasicBlock* block, Schema& sche
{
numCountersPerProbe = 2;
}
else if (JitConfig.JitCounterPadding() > 0)
{
numCountersPerProbe = (unsigned)JitConfig.JitCounterPadding();
}

// Remember the schema index for this block.
//
Expand Down Expand Up @@ -1743,6 +1747,10 @@ void EfficientEdgeCountInstrumentor::BuildSchemaElements(BasicBlock* block, Sche
{
numCountersPerProbe = 2;
}
else if (JitConfig.JitCounterPadding() > 0)
{
numCountersPerProbe = (unsigned)JitConfig.JitCounterPadding();
}

// Walk the bbSparseProbeList, emitting one schema element per...
//
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/jitconfigvalues.h
Original file line number Diff line number Diff line change
Expand Up @@ -577,6 +577,7 @@ CONFIG_STRING(JitEnablePatchpointRange, W("JitEnablePatchpointRange"))
// Profile instrumentation options
CONFIG_INTEGER(JitInterlockedProfiling, W("JitInterlockedProfiling"), 0)
CONFIG_INTEGER(JitScalableProfiling, W("JitScalableProfiling"), 1)
CONFIG_INTEGER(JitCounterPadding, W("JitCounterPadding"), 0) // number of unused extra slots per counter
CONFIG_INTEGER(JitMinimalJitProfiling, W("JitMinimalJitProfiling"), 1)
CONFIG_INTEGER(JitMinimalPrejitProfiling, W("JitMinimalPrejitProfiling"), 0)

Expand Down
8 changes: 8 additions & 0 deletions src/coreclr/vm/eeconfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@ HRESULT EEConfig::Init()
#if defined(FEATURE_PGO)
fTieredPGO = false;
tieredPGO_InstrumentOnlyHotCode = false;
tieredPGO_ScalableCountThreshold = 13;
#endif

#if defined(FEATURE_READYTORUN)
Expand Down Expand Up @@ -782,6 +783,13 @@ HRESULT EEConfig::sync()
fTieredPGO |= CLRConfig::GetConfigValue(CLRConfig::INTERNAL_WritePGOData) != 0;
tieredPGO_InstrumentOnlyHotCode = CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_TieredPGO_InstrumentOnlyHotCode) == 1;

DWORD scalableCountThreshold = CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_TieredPGO_ScalableCountThreshold);

if ((scalableCountThreshold > 0) && (scalableCountThreshold < 20))
{
tieredPGO_ScalableCountThreshold = scalableCountThreshold;
}

// We need quick jit for TieredPGO
if (!fTieredCompilation_QuickJit)
{
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/vm/eeconfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ class EEConfig
#if defined(FEATURE_PGO)
bool TieredPGO(void) const { LIMITED_METHOD_CONTRACT; return fTieredPGO; }
bool TieredPGO_InstrumentOnlyHotCode(void) const { LIMITED_METHOD_CONTRACT; return tieredPGO_InstrumentOnlyHotCode; }
DWORD TieredPGO_ScalableCountThreshold() const { LIMITED_METHOD_CONTRACT; return tieredPGO_ScalableCountThreshold; }
#endif

#if defined(FEATURE_READYTORUN)
Expand Down Expand Up @@ -658,6 +659,7 @@ class EEConfig
#if defined(FEATURE_PGO)
bool fTieredPGO;
bool tieredPGO_InstrumentOnlyHotCode;
DWORD tieredPGO_ScalableCountThreshold;
#endif

#if defined(FEATURE_READYTORUN)
Expand Down
15 changes: 8 additions & 7 deletions src/coreclr/vm/jithelpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6077,8 +6077,8 @@ HCIMPLEND

// Helpers for scalable approximate counters
//
// Here 13 means we count accurately up to 2^13 = 8192 and
// then start counting probabialistically.
// Here threshold = 13 means we count accurately up to 2^13 = 8192 and
// then start counting probabilistically.
//
// See docs/design/features/ScalableApproximateCounting.md
//
Expand All @@ -6089,22 +6089,22 @@ HCIMPL1(void, JIT_CountProfile32, volatile LONG* pCounter)

LONG count = *pCounter;
LONG delta = 1;
DWORD threshold = g_pConfig->TieredPGO_ScalableCountThreshold();

if (count > 0)
{
DWORD logCount = 0;
BitScanReverse(&logCount, count);

if (logCount >= 13)
if (logCount >= threshold)
{
delta = 1 << (logCount - 12);
delta = 1 << (logCount - (threshold - 1));
const unsigned rand = HandleHistogramProfileRand();
const bool update = (rand & (delta - 1)) == 0;
if (!update)
{
return;
}

}
}

Expand All @@ -6119,15 +6119,16 @@ HCIMPL1(void, JIT_CountProfile64, volatile LONG64* pCounter)

LONG64 count = *pCounter;
LONG64 delta = 1;
DWORD threshold = g_pConfig->TieredPGO_ScalableCountThreshold();

if (count > 0)
{
DWORD logCount = 0;
BitScanReverse64(&logCount, count);

if (logCount >= 13)
if (logCount >= threshold)
{
delta = 1LL << (logCount - 12);
delta = 1LL << (logCount - (threshold - 1));
const unsigned rand = HandleHistogramProfileRand();
const bool update = (rand & (delta - 1)) == 0;
if (!update)
Expand Down

0 comments on commit f5881e5

Please sign in to comment.