Skip to content
This repository has been archived by the owner on Jan 23, 2023. It is now read-only.

Commit

Permalink
Merge pull request #8849 from seanshpark/fixstackalign
Browse files Browse the repository at this point in the history
[x86/Linux] Stack align 16 bytes for JIT code
  • Loading branch information
BruceForstall authored Feb 7, 2017
2 parents 3108cc7 + 5df4528 commit b05cf50
Show file tree
Hide file tree
Showing 8 changed files with 211 additions and 44 deletions.
15 changes: 10 additions & 5 deletions src/jit/codegencommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3165,12 +3165,17 @@ void CodeGen::genGenerateCode(void** codePtr, ULONG* nativeSizeOfCode)
/* Check our max stack level. Needed for fgAddCodeRef().
We need to relax the assert as our estimation won't include code-gen
stack changes (which we know don't affect fgAddCodeRef()) */
noway_assert(getEmitter()->emitMaxStackDepth <=
(compiler->fgPtrArgCntMax + // Max number of pointer-sized stack arguments.
compiler->compHndBBtabCount + // Return address for locally-called finallys
genTypeStSz(TYP_LONG) + // longs/doubles may be transferred via stack, etc
(compiler->compTailCallUsed ? 4 : 0))); // CORINFO_HELP_TAILCALL args
{
unsigned maxAllowedStackDepth = compiler->fgPtrArgCntMax + // Max number of pointer-sized stack arguments.
compiler->compHndBBtabCount + // Return address for locally-called finallys
genTypeStSz(TYP_LONG) + // longs/doubles may be transferred via stack, etc
(compiler->compTailCallUsed ? 4 : 0); // CORINFO_HELP_TAILCALL args
#if defined(UNIX_X86_ABI)
maxAllowedStackDepth += genTypeStSz(TYP_INT) * 3; // stack align for x86 - allow up to 3 INT's for padding
#endif
noway_assert(getEmitter()->emitMaxStackDepth <= maxAllowedStackDepth);
}
#endif // EMIT_TRACK_STACK_DEPTH

*nativeSizeOfCode = codeSize;
compiler->info.compNativeCodeSize = (UNATIVE_OFFSET)codeSize;
Expand Down
43 changes: 30 additions & 13 deletions src/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2645,16 +2645,14 @@ void CodeGen::genLclHeap(GenTreePtr tree)
// Loop:
genDefineTempLabel(loop);

#if defined(_TARGET_AMD64_)
// Push two 8-byte zeros. This matches the 16-byte STACK_ALIGN value.
static_assert_no_msg(STACK_ALIGN == (REGSIZE_BYTES * 2));
inst_IV(INS_push_hide, 0); // --- push 8-byte 0
inst_IV(INS_push_hide, 0); // --- push 8-byte 0
#elif defined(_TARGET_X86_)
// Push a single 4-byte zero. This matches the 4-byte STACK_ALIGN value.
static_assert_no_msg(STACK_ALIGN == REGSIZE_BYTES);
inst_IV(INS_push_hide, 0); // --- push 4-byte 0
#endif // _TARGET_X86_
static_assert_no_msg((STACK_ALIGN % REGSIZE_BYTES) == 0);
unsigned const count = (STACK_ALIGN / REGSIZE_BYTES);

for (unsigned i = 0; i < count; i++)
{
inst_IV(INS_push_hide, 0); // --- push REG_SIZE bytes of 0
}
// Note that the stack must always be aligned to STACK_ALIGN bytes

// Decrement the loop counter and loop if not done.
inst_RV(INS_dec, regCnt, TYP_I_IMPL);
Expand Down Expand Up @@ -4894,9 +4892,9 @@ void CodeGen::genCallInstruction(GenTreePtr node)
stackArgBytes += argBytes;
}
else
{
#endif // FEATURE_PUT_STRUCT_ARG_STK

{
stackArgBytes += genTypeSize(genActualType(arg->TypeGet()));
}
}
Expand Down Expand Up @@ -5135,6 +5133,15 @@ void CodeGen::genCallInstruction(GenTreePtr node)
retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset);
}

#if defined(UNIX_X86_ABI)
// Put back the stack pointer if there was any padding for stack alignment
unsigned padStackAlign = call->fgArgInfo->GetPadStackAlign();
if (padStackAlign != 0)
{
inst_RV_IV(INS_add, REG_SPBASE, padStackAlign * TARGET_POINTER_SIZE, EA_PTRSIZE);
}
#endif // UNIX_X86_ABI

// if it was a pinvoke we may have needed to get the address of a label
if (genPendingCallLabel)
{
Expand Down Expand Up @@ -7753,6 +7760,16 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk)

#ifdef _TARGET_X86_

#if defined(UNIX_X86_ABI)
// For each call, first stack argument has the padding for alignment
// if this value is not zero, use it to adjust the ESP
unsigned argPadding = putArgStk->getArgPadding();
if (argPadding != 0)
{
inst_RV_IV(INS_sub, REG_SPBASE, argPadding * TARGET_POINTER_SIZE, EA_PTRSIZE);
}
#endif

if (varTypeIsStruct(targetType))
{
(void)genAdjustStackForPutArgStk(putArgStk);
Expand Down Expand Up @@ -8070,7 +8087,7 @@ void CodeGen::genPutStructArgStk(GenTreePutArgStk* putArgStk)
slotAttr = EA_BYREF;
}

const unsigned offset = i * 4;
const unsigned offset = i * TARGET_POINTER_SIZE;
if (srcAddrInReg)
{
getEmitter()->emitIns_AR_R(INS_push, slotAttr, REG_NA, srcRegNum, offset);
Expand All @@ -8079,7 +8096,7 @@ void CodeGen::genPutStructArgStk(GenTreePutArgStk* putArgStk)
{
getEmitter()->emitIns_S(INS_push, slotAttr, srcLclNum, srcLclOffset + offset);
}
genStackLevel += 4;
genStackLevel += TARGET_POINTER_SIZE;
}
#else // !defined(_TARGET_X86_)

Expand Down
19 changes: 19 additions & 0 deletions src/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -1186,6 +1186,11 @@ struct fgArgTabEntry
unsigned alignment; // 1 or 2 (slots/registers)
unsigned lateArgInx; // index into gtCallLateArgs list
unsigned tmpNum; // the LclVar number if we had to force evaluation of this arg
#if defined(UNIX_X86_ABI)
unsigned padStkAlign; // Count of number of padding slots for stack alignment. For each Call, only the first
// argument may have a value to emit "sub esp, n" to adjust the stack before pushing
// the argument.
#endif

bool isSplit : 1; // True when this argument is split between the registers and OutArg area
bool needTmp : 1; // True when we force this argument's evaluation into a temp LclVar
Expand Down Expand Up @@ -1263,6 +1268,10 @@ class fgArgInfo
unsigned argCount; // Updatable arg count value
unsigned nextSlotNum; // Updatable slot count value
unsigned stkLevel; // Stack depth when we make this call (for x86)
#if defined(UNIX_X86_ABI)
unsigned padStkAlign; // Count of number of padding slots for stack alignment. This value is used to turn back
// stack pointer before it was adjusted after each Call
#endif

unsigned argTableSize; // size of argTable array (equal to the argCount when done with fgMorphArgs)
bool hasRegArgs; // true if we have one or more register arguments
Expand Down Expand Up @@ -1312,6 +1321,10 @@ class fgArgInfo

void ArgsComplete();

#if defined(UNIX_X86_ABI)
void ArgsAlignPadding();
#endif

void SortArgs();

void EvalArgsToTemps();
Expand All @@ -1331,6 +1344,12 @@ class fgArgInfo
{
return nextSlotNum;
}
#if defined(UNIX_X86_ABI)
unsigned GetPadStackAlign()
{
return padStkAlign;
}
#endif
bool HasRegArgs()
{
return hasRegArgs;
Expand Down
27 changes: 27 additions & 0 deletions src/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -4546,6 +4546,9 @@ struct GenTreePhiArg : public GenTreeLclVarCommon
struct GenTreePutArgStk : public GenTreeUnOp
{
unsigned gtSlotNum; // Slot number of the argument to be passed on stack
#if defined(UNIX_X86_ABI)
unsigned gtPadAlign; // Number of padding slots for stack alignment
#endif

#if FEATURE_FASTTAILCALL
bool putInIncomingArgArea; // Whether this arg needs to be placed in incoming arg area.
Expand All @@ -4561,6 +4564,9 @@ struct GenTreePutArgStk : public GenTreeUnOp
DEBUGARG(bool largeNode = false))
: GenTreeUnOp(oper, type DEBUGARG(largeNode))
, gtSlotNum(slotNum)
#if defined(UNIX_X86_ABI)
, gtPadAlign(0)
#endif
, putInIncomingArgArea(_putInIncomingArgArea)
#ifdef FEATURE_PUT_STRUCT_ARG_STK
, gtPutArgStkKind(Kind::Invalid)
Expand All @@ -4582,6 +4588,9 @@ struct GenTreePutArgStk : public GenTreeUnOp
DEBUGARG(bool largeNode = false))
: GenTreeUnOp(oper, type, op1 DEBUGARG(largeNode))
, gtSlotNum(slotNum)
#if defined(UNIX_X86_ABI)
, gtPadAlign(0)
#endif
, putInIncomingArgArea(_putInIncomingArgArea)
#ifdef FEATURE_PUT_STRUCT_ARG_STK
, gtPutArgStkKind(Kind::Invalid)
Expand All @@ -4603,6 +4612,9 @@ struct GenTreePutArgStk : public GenTreeUnOp
DEBUGARG(GenTreePtr callNode = NULL) DEBUGARG(bool largeNode = false))
: GenTreeUnOp(oper, type DEBUGARG(largeNode))
, gtSlotNum(slotNum)
#if defined(UNIX_X86_ABI)
, gtPadAlign(0)
#endif
#ifdef FEATURE_PUT_STRUCT_ARG_STK
, gtPutArgStkKind(Kind::Invalid)
, gtNumSlots(numSlots)
Expand All @@ -4622,6 +4634,9 @@ struct GenTreePutArgStk : public GenTreeUnOp
DEBUGARG(GenTreePtr callNode = NULL) DEBUGARG(bool largeNode = false))
: GenTreeUnOp(oper, type, op1 DEBUGARG(largeNode))
, gtSlotNum(slotNum)
#if defined(UNIX_X86_ABI)
, gtPadAlign(0)
#endif
#ifdef FEATURE_PUT_STRUCT_ARG_STK
, gtPutArgStkKind(Kind::Invalid)
, gtNumSlots(numSlots)
Expand All @@ -4640,6 +4655,18 @@ struct GenTreePutArgStk : public GenTreeUnOp
return gtSlotNum * TARGET_POINTER_SIZE;
}

#if defined(UNIX_X86_ABI)
unsigned getArgPadding()
{
return gtPadAlign;
}

void setArgPadding(unsigned padAlign)
{
gtPadAlign = padAlign;
}
#endif

#ifdef FEATURE_PUT_STRUCT_ARG_STK
unsigned getArgSize()
{
Expand Down
25 changes: 25 additions & 0 deletions src/jit/lclvars.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5773,6 +5773,7 @@ void Compiler::lvaAlignFrame()

#elif defined(_TARGET_X86_)

#if DOUBLE_ALIGN
if (genDoubleAlign())
{
// Double Frame Alignement for x86 is handled in Compiler::lvaAssignVirtualFrameOffsetsToLocals()
Expand All @@ -5783,6 +5784,30 @@ void Compiler::lvaAlignFrame()
lvaIncrementFrameSize(sizeof(void*));
}
}
#endif

if (STACK_ALIGN > REGSIZE_BYTES)
{
if (lvaDoneFrameLayout != FINAL_FRAME_LAYOUT)
{
// If we are not doing final layout, we don't know the exact value of compLclFrameSize
// and thus do not know how much we will need to add in order to be aligned.
// We add the maximum pad that we could ever have (which is 12)
lvaIncrementFrameSize(STACK_ALIGN - REGSIZE_BYTES);
}

// Align the stack with STACK_ALIGN value.
int adjustFrameSize = compLclFrameSize;
#if defined(UNIX_X86_ABI)
// we need to consider spilled register(s) plus return address and/or EBP
int adjustCount = compCalleeRegsPushed + 1 + (codeGen->isFramePointerUsed() ? 1 : 0);
adjustFrameSize += (adjustCount * REGSIZE_BYTES) % STACK_ALIGN;
#endif
if ((adjustFrameSize % STACK_ALIGN) != 0)
{
lvaIncrementFrameSize(STACK_ALIGN - (adjustFrameSize % STACK_ALIGN));
}
}

#else
NYI("TARGET specific lvaAlignFrame");
Expand Down
5 changes: 5 additions & 0 deletions src/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -943,6 +943,11 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP
info->slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(info->numSlots) DEBUGARG(call));
#endif

#if defined(UNIX_X86_ABI)
assert((info->padStkAlign > 0 && info->numSlots > 0) || (info->padStkAlign == 0));
putArg->AsPutArgStk()->setArgPadding(info->padStkAlign);
#endif

#ifdef FEATURE_PUT_STRUCT_ARG_STK
// If the ArgTabEntry indicates that this arg is a struct
// get and store the number of slots that are references.
Expand Down
Loading

0 comments on commit b05cf50

Please sign in to comment.