Skip to content

Commit

Permalink
Merge pull request #18793 from dsouzai/prof_inl_fixes
Browse files Browse the repository at this point in the history
Fix Inlining Bugs
  • Loading branch information
vijaysun-omr authored Jan 24, 2024
2 parents 8e696cc + b065415 commit e4997aa
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 72 deletions.
94 changes: 51 additions & 43 deletions runtime/compiler/optimizer/InlinerTempForJ9.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3354,7 +3354,7 @@ void TR_MultipleCallTargetInliner::weighCallSite( TR_CallStack * callStack , TR_

for (int32_t k = 0; k < callsite->numTargets(); k++)
{
uint32_t size = 0;
int32_t size = 0;

TR_EstimateCodeSize::raiiWrapper ecsWrapper(this, tracer(), _maxRecursiveCallByteCodeSizeEstimate);
TR_EstimateCodeSize *ecs = ecsWrapper.getCodeEstimator();
Expand Down Expand Up @@ -3452,7 +3452,7 @@ void TR_MultipleCallTargetInliner::weighCallSite( TR_CallStack * callStack , TR_
heuristicTrace(tracer(),"WeighCallSite: For Target %p node %p signature %s, estimation returned a size of %d",
calltarget,calltarget->_myCallSite->_callNode,tracer()->traceSignature(calltarget),size);

if (currentBlockHasExceptionSuccessors && ecs->aggressivelyInlineThrows())
if (currentBlockHasExceptionSuccessors && ecs->aggressivelyInlineThrows() && size > 0)
{
_maxRecursiveCallByteCodeSizeEstimate >>= 3;
size >>= 3;
Expand All @@ -3469,7 +3469,7 @@ void TR_MultipleCallTargetInliner::weighCallSite( TR_CallStack * callStack , TR_
(size > _maxRecursiveCallByteCodeSizeEstimate/2))
possiblyVeryHotLargeCallee = true;

if (calltarget->_calleeSymbol->isSynchronised())
if (calltarget->_calleeSymbol->isSynchronised() && size > 0)
{
size >>= 1; // could help gvp
heuristicTrace(tracer(),"Setting size to %d because call is Synchronized",size);
Expand All @@ -3481,13 +3481,13 @@ void TR_MultipleCallTargetInliner::weighCallSite( TR_CallStack * callStack , TR_
wouldBenefitFromInlining = true;
}

if (strstr(calltarget->_calleeSymbol->signature(trMemory()),"BigDecimal.add("))
if (strstr(calltarget->_calleeSymbol->signature(trMemory()),"BigDecimal.add(") && size > 0)
{
size >>=2;
heuristicTrace(tracer(),"Setting size to %d because call is BigDecimal.add",size);
}

if (isHot(comp()))
if (isHot(comp()) && size > 0)
{
TR_ResolvedMethod *m = calltarget->_calleeSymbol->getResolvedMethod();
const char *sig = "toString";
Expand Down Expand Up @@ -3587,53 +3587,61 @@ void TR_MultipleCallTargetInliner::weighCallSite( TR_CallStack * callStack , TR_
if (comp()->trace(OMR::inlining))
heuristicTrace(tracer(),"WeighCallSite: Considering shrinking call %p with frequency %d\n", callsite->_callNode, frequency2);

bool largeCompiledCallee = !comp()->getOption(TR_InlineVeryLargeCompiledMethods) &&
isLargeCompiledMethod(calltarget->_calleeMethod, size, frequency2);
if (largeCompiledCallee)
{
size = size*TR::Options::_inlinerVeryLargeCompiledMethodAdjustFactor;
}
else if (frequency2 > borderFrequency)
if (size > 0)
{
float factor = (float)(maxFrequency-frequency2)/(float)maxFrequency;
factor = std::max(factor, 0.4f);
bool largeCompiledCallee = !comp()->getOption(TR_InlineVeryLargeCompiledMethods) &&
isLargeCompiledMethod(calltarget->_calleeMethod, size, frequency2);
if (largeCompiledCallee)
{
size = size*TR::Options::_inlinerVeryLargeCompiledMethodAdjustFactor;
}
else if (frequency2 > borderFrequency)
{
float factor = (float)(maxFrequency-frequency2)/(float)maxFrequency;
factor = std::max(factor, 0.4f);

float avgMethodSize = (float)size/(float)ecs->getNumOfEstimatedCalls();
float numCallsFactor = (float)(avgMethodSize)/110.0f;
float avgMethodSize = (float)size/(float)ecs->getNumOfEstimatedCalls();
float numCallsFactor = (float)(avgMethodSize)/110.0f;

numCallsFactor = std::max(factor, 0.1f);
numCallsFactor = std::max(factor, 0.1f);

if (size > 100)
{
size = (int)((float)size * factor * numCallsFactor);
if (size < 100) size = 100;
if (size > 100)
{
size = (int)((float)size * factor * numCallsFactor);
if (size < 100) size = 100;
}
else
{
size = (int)((float)size * factor * numCallsFactor);
}
if (comp()->trace(OMR::inlining))
heuristicTrace(tracer(), "WeighCallSite: Adjusted call-graph size for call node %p, from %d to %d\n", callsite->_callNode, origSize, size);
}
else
else if ((frequency2 > 0) && (frequency2 < veryColdBorderFrequency)) // luke-warm block
{
size = (int)((float)size * factor * numCallsFactor);
float factor = (float)frequency2 / (float)maxFrequency;
//factor = std::max(factor, 0.1f);
size = (int)((float)size / (factor*factor)); // make the size look bigger to inline less
if (comp()->trace(OMR::inlining))
heuristicTrace(tracer(), "WeighCallSite: Adjusted call-graph size for call node %p, from %d to %d\n", callsite->_callNode, origSize, size);
}
if (comp()->trace(OMR::inlining))
heuristicTrace(tracer(), "WeighCallSite: Adjusted call-graph size for call node %p, from %d to %d\n", callsite->_callNode, origSize, size);
}
else if ((frequency2 > 0) && (frequency2 < veryColdBorderFrequency)) // luke-warm block
{
float factor = (float)frequency2 / (float)maxFrequency;
//factor = std::max(factor, 0.1f);
size = (int)((float)size / (factor*factor)); // make the size look bigger to inline less
if (comp()->trace(OMR::inlining))
heuristicTrace(tracer(), "WeighCallSite: Adjusted call-graph size for call node %p, from %d to %d\n", callsite->_callNode, origSize, size);
}
else if ((frequency2 >= 0) && (frequency2 < coldBorderFrequency)) // very cold block
{
//to avoid division by zero crash. Semantically freqs of 0 and 1 should be pretty close given maxFrequency of 10K
int adjFrequency2 = frequency2 ? frequency2 : 1;
float factor = (float)adjFrequency2 / (float)maxFrequency;
//factor = std::max(factor, 0.1f);
size = (int)((float)size / factor);
else if ((frequency2 >= 0) && (frequency2 < coldBorderFrequency)) // very cold block
{
//to avoid division by zero crash. Semantically freqs of 0 and 1 should be pretty close given maxFrequency of 10K
int adjFrequency2 = frequency2 ? frequency2 : 1;
float factor = (float)adjFrequency2 / (float)maxFrequency;
//factor = std::max(factor, 0.1f);
size = (int)((float)size / factor);


if (comp()->trace(OMR::inlining))
heuristicTrace(tracer(),"WeighCallSite: Adjusted call-graph size for call node %p, from %d to %d\n", callsite->_callNode, origSize, size);
if (comp()->trace(OMR::inlining))
heuristicTrace(tracer(),"WeighCallSite: Adjusted call-graph size for call node %p, from %d to %d\n", callsite->_callNode, origSize, size);
}
else
{
if (comp()->trace(OMR::inlining))
heuristicTrace(tracer(),"WeighCallSite: Not adjusted call-graph size for call node %p, size %d\n", callsite->_callNode, origSize);
}
}
else
{
Expand Down
62 changes: 33 additions & 29 deletions runtime/compiler/runtime/J9ValueProfiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ template <class T> class List;
template <class T> class ListElement;

/**
* Clang and old versions of XLC cannot determine uintptr_t is
* Clang and old versions of XLC cannot determine uintptr_t is
* equivalent to uint32_t or uint64_t.
*/
#if defined(TR_HOST_POWER) || defined(TR_HOST_S390) || defined(OSX)
Expand Down Expand Up @@ -164,7 +164,7 @@ struct TR_ByteInfo
union TR_BigDecimalInfo
{
uint64_t value;
struct
struct
{
int32_t flag;
int32_t scale;
Expand Down Expand Up @@ -233,7 +233,7 @@ class TR_AbstractProfilerInfo : public TR_Link0<TR_AbstractProfilerInfo>

/**
* Hash table abstract profiler
*
*
* Provides common methods for generating the instrumentation
*/
class TR_AbstractHashTableProfilerInfo : public TR_AbstractProfilerInfo
Expand Down Expand Up @@ -268,7 +268,7 @@ class TR_AbstractHashTableProfilerInfo : public TR_AbstractProfilerInfo
* Table layout
*/
uintptr_t getBaseAddress() { return (uintptr_t) this; }
virtual size_t getLockOffset() = 0;
virtual size_t getLockOffset() = 0;
virtual size_t getHashOffset() = 0;
virtual size_t getKeysOffset() = 0;
virtual size_t getFreqOffset() = 0;
Expand Down Expand Up @@ -327,7 +327,7 @@ class TR_AbstractHashTableProfilerInfo : public TR_AbstractProfilerInfo

/**
* Hash table implementation of value profiling
*
*
* Instrumentation will generate a hash and check for a match in jitted code.
* Collisions are managed by the runtime helper.
*/
Expand Down Expand Up @@ -462,10 +462,10 @@ TR_HashTableProfilerInfo<T>::getTotalFrequency()
if (freqs[i] > 0 && i != getOtherIndex())
totalFrequency += freqs[i];
}

unlock();

return totalFrequency;
return totalFrequency;
}

/**
Expand Down Expand Up @@ -553,7 +553,7 @@ TR_HashTableProfilerInfo<T>::getMaxValue(T &value)

/**
* Stash the values and frequencies into a vector.
*
*
* \param vec Vector to fill. Will be cleared.
* \return Reference to vector.
*/
Expand Down Expand Up @@ -727,7 +727,7 @@ TR_EmbeddedHashTable<T, bits>::addKey(T value)
{
available = available > 0 ? available : i;
continue;
}
}

if (value == _keys[i])
break;
Expand Down Expand Up @@ -756,7 +756,7 @@ TR_EmbeddedHashTable<T, bits>::addKey(T value)

// Store the first value
_keys[newIndex] = value;
_freqs[newIndex] = 1;
_freqs[newIndex] = 1;
this->_hashConfig = hashConfig;
}
else
Expand Down Expand Up @@ -787,7 +787,7 @@ TR_EmbeddedHashTable<T, bits>::addKey(T value)

// Unlock the table, leaving JIT access disabled if at capacity
this->_metaData.full = populated >= this->getCapacity();
this->unlock(populated < this->getCapacity());
this->unlock(populated < this->getCapacity());

if (dumpInfo)
{
Expand Down Expand Up @@ -845,15 +845,15 @@ TR_HashTableProfilerInfo<T>::initialHashConfig(HashFunction &hash, T value)
}

// Otherwise, place in final slot
size_t firstOne = getHashType() == BitIndexHash ? 0 : 8;
size_t firstOne = getHashType() == BitIndexHash ? 0 : 8;
for (size_t i = 0; i < getBits(); ++i)
{
if (getHashType() == BitIndexHash)
hash.indices[i] = firstOne;
else
hash.shifts[i] = firstOne - i;
}

return (1 << getBits()) - 1;
}

Expand All @@ -879,7 +879,7 @@ TR_HashTableProfilerInfo<T>::updateHashConfig(HashFunction &hash, T mask)
for (; mask && bit < getBits(); ++bit)
{
// Shift mask until a set bit is found
while (!(mask & 1))
while (!(mask & 1))
{
++index;
mask >>= 1;
Expand Down Expand Up @@ -937,7 +937,7 @@ TR_EmbeddedHashTable<T, bits>::recursivelySplit(T mask, T choices)
return mask;

// Try to preserve the existing order by selecting a bit that is zero in key0, one in key1
T diff = ~key0 & key1;
T diff = ~key0 & key1;

// Couldn't find a suitable choice, grab any difference
if (!diff)
Expand Down Expand Up @@ -965,7 +965,7 @@ TR_HashTableProfilerInfo<T>::applyHash(HashFunction &hash, T value)
{
size_t result = 0;
if (getHashType() == BitMaskHash)
{
{
size_t i = 1;
T work = hash.mask;
while (work)
Expand Down Expand Up @@ -1286,7 +1286,7 @@ TR_LinkedListProfilerInfo<T>::getMaxValue(T &value)

/**
* Stash the values and frequencies into a vector.
*
*
* \param vec Vector to fill. Will be cleared.
* \return Reference to vector.
*/
Expand All @@ -1299,7 +1299,7 @@ TR_LinkedListProfilerInfo<T>::getList(TR::vector<TR_ProfiledValue<T>, TR::Region
vec.resize(getNumProfiledValues());
size_t i = 0;
for (auto iter = getFirst(); iter; iter = iter->getNext())
{
{
if (iter->_frequency > 0)
{
vec[i]._value = iter->_value;
Expand Down Expand Up @@ -1328,7 +1328,7 @@ TR_LinkedListProfilerInfo<T>::dumpInfo(TR::FILE *logFile)
size_t count = 0;
size_t padding = 2 * sizeof(T) + 2;
for (auto iter = getFirst(); iter; iter = iter->getNext())
trfprintf(logFile, " %d: %d %0*x", count++, iter->_frequency, padding, iter->_value);
trfprintf(logFile, " %d: %d %0*x", count++, iter->_frequency, padding, iter->_value);

trfprintf(logFile, " Num: %d Total Frequency: %d\n", count, getTotalFrequency());
}
Expand All @@ -1343,15 +1343,15 @@ template <typename T> uint32_t
TR_LinkedListProfilerInfo<T>::getTotalFrequency(uintptr_t **addrOfTotalFrequency)
{
OMR::CriticalSection lock(vpMonitor);

uintptr_t *addr = NULL;
for (auto cursor = getFirst(); cursor; cursor = cursor->getNext())
addr = &cursor->_totalFrequency;

if (addrOfTotalFrequency)
*addrOfTotalFrequency = addr;
return (uint32_t) *addr;
}
return (uint32_t) *addr;
}

/**
* Add a value to the list if there is room and its not already present.
Expand All @@ -1374,6 +1374,7 @@ TR_LinkedListProfilerInfo<T>::incrementOrCreate(T &value, uintptr_t **addrOfTota
totalFrequency = getTotalFrequency(addrOfTotalFrequency);

int32_t numDistinctValuesProfiled = 0;
Element *lastCursorExtraInfo = getFirst();
Element *cursorExtraInfo = getFirst()->getNext();
while (cursorExtraInfo)
{
Expand All @@ -1389,11 +1390,14 @@ TR_LinkedListProfilerInfo<T>::incrementOrCreate(T &value, uintptr_t **addrOfTota
}

numDistinctValuesProfiled++;
lastCursorExtraInfo = cursorExtraInfo;
cursorExtraInfo = cursorExtraInfo->getNext();
}

if (!cursorExtraInfo)
cursorExtraInfo = getFirst();
TR_ASSERT_FATAL(cursorExtraInfo == NULL, "cursorExtraInfo = 0x%p should be NULL at this point!\n", cursorExtraInfo);
TR_ASSERT_FATAL(lastCursorExtraInfo, "lastCursorExtraInfo should not be NULL at this point!\n");

cursorExtraInfo = lastCursorExtraInfo;

maxNumValuesProfiled = std::min<uint32_t>(maxNumValuesProfiled, LINKEDLIST_MAX_NUM_VALUES);
if (numDistinctValuesProfiled <= maxNumValuesProfiled)
Expand Down Expand Up @@ -1531,7 +1535,7 @@ TR_ArrayProfilerInfo<T>::getMaxValue(T &value)

/**
* Stash the values and frequencies into a vector.
*
*
* \param vec Vector to fill. Will be cleared.
* \return Reference to vector.
*/
Expand All @@ -1544,7 +1548,7 @@ TR_ArrayProfilerInfo<T>::getList(TR::vector<TR_ProfiledValue<T>, TR::Region&> &v
vec.resize(getNumProfiledValues());
size_t j = 0;
for (size_t i = 0; i < getSize(); ++i)
{
{
if (_frequencies[i] > 0)
{
vec[j]._value = _values[i];
Expand Down Expand Up @@ -1608,7 +1612,7 @@ TR_ArrayProfilerInfo<T>::incrementOrCreate(const T &value)
break;
}
}

_totalFrequency++;
}

Expand Down Expand Up @@ -1760,15 +1764,15 @@ TR_GenericValueInfo<T>::getMaxValue()
/**
* Stash the values and frequencies into a vector and sort it.
* Sort is ordered by frequency descending.
*
*
* \param region Region to allocate vector in.
* \return Reference to sorted vector.
*/
template <typename T> void
TR_GenericValueInfo<T>::getSortedList(Vector &vec)
{
_profiler->getList(vec);
std::sort(vec.begin(), vec.end(), DescendingSort());
std::sort(vec.begin(), vec.end(), DescendingSort());
}

/**
Expand Down

0 comments on commit e4997aa

Please sign in to comment.