Skip to content
This repository has been archived by the owner on Jan 23, 2023. It is now read-only.

Commit

Permalink
JIT: convert fixed-sized locallocs to locals, enable inlining
Browse files Browse the repository at this point in the history
Optimize fixed sized locallocs of 32 bytes or less to use local buffers,
if the localloc is not in a loop.

Also "optimize" the degenerate 0 byte case.

Allow inline candidates containing localloc, but fail inlining if any
of a candidate's locallocs do not convert to local buffers.

The 32 byte size threshold was arrived at empirically; larger values did
not enable many more cases and started seeinge size bloat because of
larger stack offsets.

We can revise this threshold if we are willing to reorder locals and see
fixed sized cases larger than 32 bytes.

Closes #8542.

Also add missing handler for the callsite is in try region, this was
an oversight.
  • Loading branch information
AndyAyersMS committed Oct 31, 2017
1 parent 9e186c8 commit bb84711
Show file tree
Hide file tree
Showing 13 changed files with 298 additions and 23 deletions.
1 change: 1 addition & 0 deletions src/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1997,6 +1997,7 @@ void Compiler::compInit(ArenaAllocator* pAlloc, InlineInfo* inlineInfo)
compLongUsed = false;
compTailCallUsed = false;
compLocallocUsed = false;
compLocallocOptimized = false;
compQmarkRationalized = false;
compQmarkUsed = false;
compFloatingPointUsed = false;
Expand Down
3 changes: 3 additions & 0 deletions src/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -7917,6 +7917,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
bool compFloatingPointUsed; // Does the method use TYP_FLOAT or TYP_DOUBLE
bool compTailCallUsed; // Does the method do a tailcall
bool compLocallocUsed; // Does the method use localloc.
bool compLocallocOptimized; // Does the method have an optimized localloc
bool compQmarkUsed; // Does the method use GT_QMARK/GT_COLON
bool compQmarkRationalized; // Is it allowed to use a GT_QMARK/GT_COLON node.
bool compUnsafeCastUsed; // Does the method use LDIND/STIND to cast between scalar/refernce types
Expand Down Expand Up @@ -9295,6 +9296,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

#define DEFAULT_MAX_INLINE_DEPTH 20 // Methods at more than this level deep will not be inlined

#define DEFAULT_MAX_LOCALLOC_TO_LOCAL_SIZE 32 // fixed locallocs of this size or smaller will convert to local buffers

private:
#ifdef FEATURE_JIT_METHOD_PERF
JitTimer* pCompJitTimer; // Timer data structure (by phases) for current compilation.
Expand Down
34 changes: 26 additions & 8 deletions src/jit/flowgraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4333,6 +4333,18 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE*
compInlineResult->NoteBool(InlineObservation::CALLEE_IS_FORCE_INLINE, isForceInline);
compInlineResult->NoteInt(InlineObservation::CALLEE_IL_CODE_SIZE, codeSize);

// Determine if call site is within a try.
if (isInlining && impInlineInfo->iciBlock->hasTryIndex())
{
compInlineResult->Note(InlineObservation::CALLSITE_IN_TRY_REGION);
}

// Determine if the call site is in a loop.
if (isInlining && ((impInlineInfo->iciBlock->bbFlags & BBF_BACKWARD_JUMP) != 0))
{
compInlineResult->Note(InlineObservation::CALLSITE_IN_LOOP);
}

#ifdef DEBUG

// If inlining, this method should still be a candidate.
Expand Down Expand Up @@ -4807,8 +4819,6 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE*
// the list of other opcodes (for all platforms).

__fallthrough;

case CEE_LOCALLOC:
case CEE_MKREFANY:
case CEE_RETHROW:
if (makeInlineObservations)
Expand All @@ -4826,6 +4836,19 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE*
}
break;

case CEE_LOCALLOC:

// We now allow localloc callees to become candidates in some cases.
if (makeInlineObservations)
{
compInlineResult->Note(InlineObservation::CALLEE_HAS_LOCALLOC);
if (isInlining && compInlineResult->IsFailure())
{
return;
}
}
break;

case CEE_LDARG_0:
case CEE_LDARG_1:
case CEE_LDARG_2:
Expand Down Expand Up @@ -4915,12 +4938,6 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE*
}
}

// Determine if call site is within a try.
if (isInlining && impInlineInfo->iciBlock->hasTryIndex())
{
compInlineResult->Note(InlineObservation::CALLSITE_IN_TRY_REGION);
}

// If the inline is viable and discretionary, do the
// profitability screening.
if (compInlineResult->IsDiscretionaryCandidate())
Expand Down Expand Up @@ -22958,6 +22975,7 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo)
compLongUsed |= InlineeCompiler->compLongUsed;
compFloatingPointUsed |= InlineeCompiler->compFloatingPointUsed;
compLocallocUsed |= InlineeCompiler->compLocallocUsed;
compLocallocOptimized |= InlineeCompiler->compLocallocOptimized;
compQmarkUsed |= InlineeCompiler->compQmarkUsed;
compUnsafeCastUsed |= InlineeCompiler->compUnsafeCastUsed;
compNeedsGSSecurityCookie |= InlineeCompiler->compNeedsGSSecurityCookie;
Expand Down
83 changes: 73 additions & 10 deletions src/jit/importer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14279,8 +14279,6 @@ void Compiler::impImportBlockCode(BasicBlock* block)
break;

case CEE_LOCALLOC:
assert(!compIsForInlining());

if (tiVerificationNeeded)
{
Verify(false, "bad opcode");
Expand All @@ -14292,11 +14290,6 @@ void Compiler::impImportBlockCode(BasicBlock* block)
BADCODE("Localloc can't be inside handler");
}

/* The FP register may not be back to the original value at the end
of the method, even if the frame size is 0, as localloc may
have modified it. So we will HAVE to reset it */

compLocallocUsed = true;
setNeedsGSSecurityCookie();

// Get the size to allocate
Expand All @@ -14309,11 +14302,81 @@ void Compiler::impImportBlockCode(BasicBlock* block)
BADCODE("Localloc can only be used when the stack is empty");
}

op1 = gtNewOperNode(GT_LCLHEAP, TYP_I_IMPL, op2);
// If the localloc is not in a loop and its size is a small constant,
// create a new local var of TYP_BLK and return its address.
{
bool convertedToLocal = false;

// Need to aggressively fold here, as even fixed-size locallocs
// will have casts in the way.
op2 = gtFoldExpr(op2);

if (op2->IsIntegralConst())
{
const ssize_t allocSize = op2->AsIntCon()->IconValue();

if (allocSize == 0)
{
// Result is nullptr
JITDUMP("Converting stackalloc of 0 bytes to push null unmanaged pointer\n");
op1 = gtNewIconNode(0, TYP_I_IMPL);
convertedToLocal = true;
}
else if ((allocSize > 0) && ((compCurBB->bbFlags & BBF_BACKWARD_JUMP) == 0))
{
// Get the size threshold for local conversion
ssize_t maxSize = DEFAULT_MAX_LOCALLOC_TO_LOCAL_SIZE;

// May throw a stack overflow exception. Obviously, we don't want locallocs to be CSE'd.
#ifdef DEBUG
// Optionally allow this to be modified
maxSize = JitConfig.JitStackAllocToLocalSize();
#endif // DEBUG

op1->gtFlags |= (GTF_EXCEPT | GTF_DONT_CSE);
if (allocSize <= maxSize)
{
const unsigned stackallocAsLocal = lvaGrabTemp(false DEBUGARG("stackallocLocal"));
JITDUMP("Converting stackalloc of %lld bytes to new local V%02u\n", allocSize,
stackallocAsLocal);
lvaTable[stackallocAsLocal].lvType = TYP_BLK;
lvaTable[stackallocAsLocal].lvExactSize = (unsigned)allocSize;
lvaTable[stackallocAsLocal].lvIsUnsafeBuffer = true;
op1 = gtNewLclvNode(stackallocAsLocal, TYP_BLK);
op1 = gtNewOperNode(GT_ADDR, TYP_I_IMPL, op1);
convertedToLocal = true;
compGSReorderStackLayout = true;
}
}
}

if (!convertedToLocal)
{
// Bail out if inlining and the localloc was not converted.
//
// Note we might consider allowing the inline, if the call
// site is not in a loop.
if (compIsForInlining())
{
InlineObservation obs = op2->IsIntegralConst()
? InlineObservation::CALLEE_LOCALLOC_TOO_LARGE
: InlineObservation::CALLSITE_LOCALLOC_SIZE_UNKNOWN;
compInlineResult->NoteFatal(obs);
return;
}

op1 = gtNewOperNode(GT_LCLHEAP, TYP_I_IMPL, op2);
// May throw a stack overflow exception. Obviously, we don't want locallocs to be CSE'd.
op1->gtFlags |= (GTF_EXCEPT | GTF_DONT_CSE);

/* The FP register may not be back to the original value at the end
of the method, even if the frame size is 0, as localloc may
have modified it. So we will HAVE to reset it */
compLocallocUsed = true;
}
else
{
compLocallocOptimized = true;
}
}

impPushOnStack(op1, tiRetVal);
break;
Expand Down
7 changes: 6 additions & 1 deletion src/jit/inline.def
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ INLINE_OBSERVATION(IS_SYNCHRONIZED, bool, "is synchronized",
INLINE_OBSERVATION(IS_VM_NOINLINE, bool, "noinline per VM", FATAL, CALLEE)
INLINE_OBSERVATION(LACKS_RETURN, bool, "no return opcode", FATAL, CALLEE)
INLINE_OBSERVATION(LDFLD_NEEDS_HELPER, bool, "ldfld needs helper", FATAL, CALLEE)
INLINE_OBSERVATION(LOCALLOC_TOO_LARGE, bool, "localloc size too large", FATAL, CALLEE)
INLINE_OBSERVATION(LOG_REPLAY_REJECT, bool, "rejected by log replay", FATAL, CALLEE)
INLINE_OBSERVATION(MARKED_AS_SKIPPED, bool, "skipped by complus request", FATAL, CALLEE)
INLINE_OBSERVATION(MAXSTACK_TOO_BIG, bool, "maxstack too big" , FATAL, CALLEE)
Expand Down Expand Up @@ -78,6 +79,7 @@ INLINE_OBSERVATION(CLASS_PROMOTABLE, bool, "promotable value class",
INLINE_OBSERVATION(DOES_NOT_RETURN, bool, "does not return", INFORMATION, CALLEE)
INLINE_OBSERVATION(END_OPCODE_SCAN, bool, "done looking at opcodes", INFORMATION, CALLEE)
INLINE_OBSERVATION(HAS_GC_STRUCT, bool, "has gc field in struct local", INFORMATION, CALLEE)
INLINE_OBSERVATION(HAS_LOCALLOC, bool, "has localloc", INFORMATION, CALLEE)
INLINE_OBSERVATION(HAS_PINNED_LOCALS, bool, "has pinned locals", INFORMATION, CALLEE)
INLINE_OBSERVATION(HAS_SIMD, bool, "has SIMD arg, local, or ret", INFORMATION, CALLEE)
INLINE_OBSERVATION(HAS_SWITCH, bool, "has switch", INFORMATION, CALLEE)
Expand Down Expand Up @@ -143,6 +145,8 @@ INLINE_OBSERVATION(IS_WITHIN_FILTER, bool, "within filter region",
INLINE_OBSERVATION(LDARGA_NOT_LOCAL_VAR, bool, "ldarga not on local var", FATAL, CALLSITE)
INLINE_OBSERVATION(LDFLD_NEEDS_HELPER, bool, "ldfld needs helper", FATAL, CALLSITE)
INLINE_OBSERVATION(LDVIRTFN_ON_NON_VIRTUAL, bool, "ldvirtfn on non-virtual", FATAL, CALLSITE)
INLINE_OBSERVATION(LOCALLOC_IN_LOOP, bool, "within loop, has localloc", FATAL, CALLSITE)
INLINE_OBSERVATION(LOCALLOC_SIZE_UNKNOWN, bool, "localloc size unknown", FATAL, CALLSITE)
INLINE_OBSERVATION(LOG_REPLAY_REJECT, bool, "rejected by log replay", FATAL, CALLSITE)
INLINE_OBSERVATION(NOT_CANDIDATE, bool, "not inline candidate", FATAL, CALLSITE)
INLINE_OBSERVATION(NOT_PROFITABLE_INLINE, bool, "unprofitable inline", FATAL, CALLSITE)
Expand All @@ -164,7 +168,8 @@ INLINE_OBSERVATION(RARE_GC_STRUCT, bool, "rarely called, has gc str
INLINE_OBSERVATION(CONSTANT_ARG_FEEDS_TEST, bool, "constant argument feeds test", INFORMATION, CALLSITE)
INLINE_OBSERVATION(DEPTH, int, "depth", INFORMATION, CALLSITE)
INLINE_OBSERVATION(FREQUENCY, int, "rough call site frequency", INFORMATION, CALLSITE)
INLINE_OBSERVATION(IN_TRY_REGION, bool, "call site in try region", INFORMATION, CALLSITE)
INLINE_OBSERVATION(IN_LOOP, bool, "call site is in a loop", INFORMATION, CALLSITE)
INLINE_OBSERVATION(IN_TRY_REGION, bool, "call site is in a try region", INFORMATION, CALLSITE)
INLINE_OBSERVATION(IS_PROFITABLE_INLINE, bool, "profitable inline", INFORMATION, CALLSITE)
INLINE_OBSERVATION(IS_SAME_THIS, bool, "same this as root caller", INFORMATION, CALLSITE)
INLINE_OBSERVATION(IS_SIZE_DECREASING_INLINE, bool, "size decreasing inline", INFORMATION, CALLSITE)
Expand Down
16 changes: 15 additions & 1 deletion src/jit/inlinepolicy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -384,11 +384,20 @@ void LegacyPolicy::NoteBool(InlineObservation obs, bool value)
}

case InlineObservation::CALLEE_HAS_PINNED_LOCALS:
case InlineObservation::CALLEE_HAS_LOCALLOC:
// The legacy policy is to never inline methods with
// pinned locals.
// pinned locals or localloc.
SetNever(obs);
break;

case InlineObservation::CALLSITE_IN_TRY_REGION:
m_CallsiteIsInTryRegion = true;
break;

case InlineObservation::CALLSITE_IN_LOOP:
m_CallsiteIsInLoop = true;
break;

default:
// Ignore the remainder for now
break;
Expand Down Expand Up @@ -900,6 +909,11 @@ void EnhancedLegacyPolicy::NoteBool(InlineObservation obs, bool value)
}
break;

case InlineObservation::CALLEE_HAS_LOCALLOC:
// We see this during the IL prescan. Ignore for now, we will
// bail out, if necessary, during importation
break;

default:
// Pass all other information to the legacy policy
LegacyPolicy::NoteBool(obs, value);
Expand Down
6 changes: 4 additions & 2 deletions src/jit/inlinepolicy.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ class LegacyPolicy : public LegalPolicy
, m_LooksLikeWrapperMethod(false)
, m_MethodIsMostlyLoadStore(false)
, m_CallsiteIsInTryRegion(false)
, m_CallsiteIsInLoop(false)
{
// empty
}
Expand Down Expand Up @@ -167,10 +168,11 @@ class LegacyPolicy : public LegalPolicy
bool m_LooksLikeWrapperMethod : 1;
bool m_MethodIsMostlyLoadStore : 1;
bool m_CallsiteIsInTryRegion : 1;
bool m_CallsiteIsInLoop : 1;
};

// EnhancedLegacyPolicy extends the legacy policy by rejecting
// inlining of methods that never return because they throw.
// EnhancedLegacyPolicy extends the legacy policy by
// relaxing various restrictions.

class EnhancedLegacyPolicy : public LegacyPolicy
{
Expand Down
1 change: 1 addition & 0 deletions src/jit/jitconfigvalues.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ CONFIG_INTEGER(JitPrintInlinedMethods, W("JitPrintInlinedMethods"), 0)
CONFIG_INTEGER(JitPrintDevirtualizedMethods, W("JitPrintDevirtualizedMethods"), 0)
CONFIG_INTEGER(JitRequired, W("JITRequired"), -1)
CONFIG_INTEGER(JitRoundFloat, W("JITRoundFloat"), DEFAULT_ROUND_LEVEL)
CONFIG_INTEGER(JitStackAllocToLocalSize, W("JitStackAllocToLocalSize"), DEFAULT_MAX_LOCALLOC_TO_LOCAL_SIZE)
CONFIG_INTEGER(JitSkipArrayBoundCheck, W("JitSkipArrayBoundCheck"), 0)
CONFIG_INTEGER(JitSlowDebugChecksEnabled, W("JitSlowDebugChecksEnabled"), 1) // Turn on slow debug checks
CONFIG_INTEGER(JitSplitFunctionSize, W("JitSplitFunctionSize"), 0) // On ARM, use this as the maximum function/funclet
Expand Down
2 changes: 1 addition & 1 deletion src/jit/morph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8272,7 +8272,7 @@ GenTreePtr Compiler::fgMorphCall(GenTreeCall* call)
{
szFailReason = "Needs security check";
}
else if (compLocallocUsed)
else if (compLocallocUsed || compLocallocOptimized)
{
szFailReason = "Localloc used";
}
Expand Down
45 changes: 45 additions & 0 deletions tests/src/JIT/opt/LocAlloc/localloc.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

// Conversion of small fixed localloc to locals
// and inlining of localloc callees

using System;

class L
{
unsafe static int Use4()
{
byte* i = stackalloc byte[4];
i[2] = 50;
return i[2] * 2;
}

unsafe static int Use(int x)
{
byte* i = stackalloc byte[x];
i[1] = 50;
return i[1] * 2;
}

public static int Main()
{
int v0 = Use4();
int v1 = Use(10);
int v2 = Use(100);
int v3 = Use(v0);
int v4 = 0;
int v5 = 0;
int v6 = 0;

for (int i = 0; i < 7; i++)
{
v5 += Use4();
v5 += Use(4);
v6 += Use(v0);
}

return v0 + v1 + v2 + v3 + v4 + v5 + v6 - 2400;
}
}
40 changes: 40 additions & 0 deletions tests/src/JIT/opt/LocAlloc/localloc.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" />
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
<AssemblyName>$(MSBuildProjectName)</AssemblyName>
<SchemaVersion>2.0</SchemaVersion>
<ProjectGuid>{95DFC527-4DC1-495E-97D7-E94EE1F7140D}</ProjectGuid>
<OutputType>Exe</OutputType>
<AppDesignerFolder>Properties</AppDesignerFolder>
<FileAlignment>512</FileAlignment>
<ProjectTypeGuids>{786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
<ReferencePath>$(ProgramFiles)\Common Files\microsoft shared\VSTT .0\UITestExtensionPackages</ReferencePath>
<SolutionDir Condition="$(SolutionDir) == '' Or $(SolutionDir) == '*Undefined*'">..\..\</SolutionDir>
<NuGetPackageImportStamp>7a9bfb7d</NuGetPackageImportStamp>
<CLRTestPriority>0</CLRTestPriority>
</PropertyGroup>
<!-- Default configurations to help VS understand the configurations -->
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "></PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' "></PropertyGroup>
<ItemGroup>
<CodeAnalysisDependentAssemblyPaths Condition=" '$(VS100COMNTOOLS)' != '' " Include="$(VS100COMNTOOLS)..\IDE\PrivateAssemblies">
<Visible>False</Visible>
</CodeAnalysisDependentAssemblyPaths>
</ItemGroup>
<PropertyGroup>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
<DebugType>None</DebugType>
<Optimize>True</Optimize>
</PropertyGroup>
<ItemGroup>
<Compile Include="localloc.cs" />
</ItemGroup>
<ItemGroup>
<Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
</ItemGroup>
<Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" />
<PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' "></PropertyGroup>
</Project>
Loading

0 comments on commit bb84711

Please sign in to comment.