Skip to content

Commit

Permalink
JIT: Make isRemovableJmpCandidate less restrictive on AMD64 (dotnet#9…
Browse files Browse the repository at this point in the history
…5493)

Previously, the emitter-level jump removal optimization would not consider jumps following call instructions on AMD64, as unwind semantics require there to be an instruction between a call and an OS epilogue. This was overly restrictive, as many call-jmp pairs do not precede an OS epilogue, and for those that do, the jump can be replaced with a nop. This change allows jumps following calls to be considered for removal, and inserts a nop if needed.
  • Loading branch information
amanasifkhalid authored Dec 7, 2023
1 parent 0124751 commit 303571a
Show file tree
Hide file tree
Showing 4 changed files with 151 additions and 34 deletions.
15 changes: 4 additions & 11 deletions src/coreclr/jit/codegenlinear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -599,11 +599,11 @@ void CodeGen::genCodeForBBlist()
noway_assert(genStackLevel == 0);

#ifdef TARGET_AMD64
bool emitNop = false;
bool emitNopBeforeEHRegion = false;
// On AMD64, we need to generate a NOP after a call that is the last instruction of the block, in several
// situations, to support proper exception handling semantics. This is mostly to ensure that when the stack
// walker computes an instruction pointer for a frame, that instruction pointer is in the correct EH region.
// The document "X64 and ARM ABIs.docx" has more details. The situations:
// The document "clr-abi.md" has more details. The situations:
// 1. If the call instruction is in a different EH region as the instruction that follows it.
// 2. If the call immediately precedes an OS epilog. (Note that what the JIT or VM consider an epilog might
// be slightly different from what the OS considers an epilog, and it is the OS-reported epilog that matters
Expand All @@ -624,7 +624,7 @@ void CodeGen::genCodeForBBlist()
case BBJ_ALWAYS:
// We might skip generating the jump via a peephole optimization.
// If that happens, make sure a NOP is emitted as the last instruction in the block.
emitNop = true;
emitNopBeforeEHRegion = true;
break;

case BBJ_THROW:
Expand Down Expand Up @@ -735,7 +735,7 @@ void CodeGen::genCodeForBBlist()
if (block->CanRemoveJumpToNext(compiler))
{
#ifdef TARGET_AMD64
if (emitNop)
if (emitNopBeforeEHRegion)
{
instGen(INS_nop);
}
Expand All @@ -750,13 +750,6 @@ void CodeGen::genCodeForBBlist()
bool isRemovableJmpCandidate =
!block->hasAlign() && !compiler->fgInDifferentRegions(block, block->GetJumpDest());

#ifdef TARGET_AMD64
// AMD64 requires an instruction after a call instruction for unwinding
// inside an EH region so if the last instruction generated was a call instruction
// do not allow this jump to be marked for possible later removal.
isRemovableJmpCandidate = isRemovableJmpCandidate && !GetEmitter()->emitIsLastInsCall();
#endif // TARGET_AMD64

inst_JMP(EJ_jmp, block->GetJumpDest(), isRemovableJmpCandidate);
#else
inst_JMP(EJ_jmp, block->GetJumpDest());
Expand Down
26 changes: 25 additions & 1 deletion src/coreclr/jit/emit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4630,9 +4630,11 @@ void emitter::emitRemoveJumpToNextInst()
insGroup* jmpGroup = jmp->idjIG;
instrDescJmp* nextJmp = jmp->idjNext;

if (jmp->idInsFmt() == IF_LABEL && emitIsUncondJump(jmp) && jmp->idjIsRemovableJmpCandidate)
if (jmp->idjIsRemovableJmpCandidate)
{
#if DEBUG
assert(jmp->idInsFmt() == IF_LABEL);
assert(emitIsUncondJump(jmp));
assert((jmpGroup->igFlags & IGF_HAS_ALIGN) == 0);
assert((jmpGroup->igNum > previousJumpIgNum) || (previousJumpIgNum == (UNATIVE_OFFSET)-1) ||
((jmpGroup->igNum == previousJumpIgNum) && (jmp->idDebugOnlyInfo()->idNum > previousJumpInsNum)));
Expand Down Expand Up @@ -4699,6 +4701,25 @@ void emitter::emitRemoveJumpToNextInst()
UNATIVE_OFFSET codeSize = jmp->idCodeSize();
jmp->idCodeSize(0);

#ifdef TARGET_AMD64
// If the removed jump is after a call and before an OS epilog, it needs to be replaced by a nop
if (jmp->idjIsAfterCallBeforeEpilog)
{
if ((targetGroup->igFlags & IGF_EPILOG) != 0)
{
// This jump will become a nop, so set its size now to ensure below calculations are correct
jmp->idCodeSize(1);
codeSize--;
}
else
{
// We don't need a nop if the removed jump isn't before an OS epilog,
// so zero jmp->idjIsAfterCallBeforeEpilog to avoid emitting a nop
jmp->idjIsAfterCallBeforeEpilog = 0;
}
}
#endif // TARGET_AMD64

jmpGroup->igSize -= (unsigned short)codeSize;
jmpGroup->igFlags |= IGF_UPD_ISZ;

Expand All @@ -4707,6 +4728,9 @@ void emitter::emitRemoveJumpToNextInst()
}
else
{
// The jump was not removed; make sure idjIsRemovableJmpCandidate reflects this
jmp->idjIsRemovableJmpCandidate = 0;

// Update the previousJmp
previousJmp = jmp;
#if DEBUG
Expand Down
12 changes: 9 additions & 3 deletions src/coreclr/jit/emit.h
Original file line number Diff line number Diff line change
Expand Up @@ -1871,11 +1871,17 @@ class emitter
// beginning of the function -- of the target instruction of the jump, used to
// determine if this jump needs to be patched.
unsigned idjOffs :
#if defined(TARGET_XARCH)
29;
// indicates that the jump was added at the end of a BBJ_ALWAYS basic block and is
#if defined(TARGET_AMD64)
28;
// Indicates the jump was added at the end of a BBJ_ALWAYS basic block and is
// a candidate for being removed if it jumps to the next instruction
unsigned idjIsRemovableJmpCandidate : 1;
// Indicates the jump follows a call instruction and precedes an OS epilog.
// If this jump is removed, a nop will need to be emitted instead (see clr-abi.md for details).
unsigned idjIsAfterCallBeforeEpilog : 1;
#elif defined(TARGET_X86)
29;
unsigned idjIsRemovableJmpCandidate : 1;
#else
30;
#endif
Expand Down
132 changes: 113 additions & 19 deletions src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3493,11 +3493,16 @@ const emitJumpKind emitReverseJumpKinds[] = {
//
/* static */ bool emitter::emitJmpInstHasNoCode(instrDesc* id)
{
bool result = (id->idIns() == INS_jmp) && (id->idCodeSize() == 0);
bool result = (id->idIns() == INS_jmp) && ((instrDescJmp*)id)->idjIsRemovableJmpCandidate;

// A zero size jump instruction can only be the one that is marked
// as removable candidate.
assert(!result || ((instrDescJmp*)id)->idjIsRemovableJmpCandidate);
// A jump marked for removal must have a code size of 0,
// except for jumps that must be replaced by nops on AMD64 (these must have a size of 1)
#ifdef TARGET_AMD64
const bool isNopReplacement = ((instrDescJmp*)id)->idjIsAfterCallBeforeEpilog && (id->idCodeSize() == 1);
assert(!result || (id->idCodeSize() == 0) || isNopReplacement);
#else // !TARGET_AMD64
assert(!result || (id->idCodeSize() == 0));
#endif // !TARGET_AMD64

return result;
}
Expand Down Expand Up @@ -9082,6 +9087,11 @@ void emitter::emitIns_J(instruction ins,
int instrCount /* = 0 */,
bool isRemovableJmpCandidate /* = false */)
{
#ifdef TARGET_AMD64
// Check emitter::emitLastIns before it is updated
const bool lastInsIsCall = emitIsLastInsCall();
#endif // TARGET_AMD64

UNATIVE_OFFSET sz;
instrDescJmp* id = emitNewInstrJmp();

Expand Down Expand Up @@ -9109,9 +9119,23 @@ void emitter::emitIns_J(instruction ins,
}
#endif // DEBUG

emitContainsRemovableJmpCandidates |= isRemovableJmpCandidate;
id->idjIsRemovableJmpCandidate = isRemovableJmpCandidate ? 1 : 0;
id->idjShort = 0;
if (isRemovableJmpCandidate)
{
emitContainsRemovableJmpCandidates = true;
id->idjIsRemovableJmpCandidate = 1;
#ifdef TARGET_AMD64
// If this jump is after a call instruction, we might need to insert a nop after it's removed,
// but only if the jump is before an OS epilog.
// We'll check for the OS epilog in emitter::emitRemoveJumpToNextInst().
id->idjIsAfterCallBeforeEpilog = lastInsIsCall ? 1 : 0;
#endif // TARGET_AMD64
}
else
{
id->idjIsRemovableJmpCandidate = 0;
}

id->idjShort = 0;
if (dst != nullptr)
{
/* Assume the jump will be long */
Expand Down Expand Up @@ -16054,6 +16078,9 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
insFormat insFmt = id->idInsFmt();
unsigned char callInstrSize = 0;

// Indicates a jump between after a call and before an OS epilog was replaced by a nop on AMD64
bool convertedJmpToNop = false;

#ifdef DEBUG
bool dspOffs = emitComp->opts.dspGCtbls;
#endif // DEBUG
Expand Down Expand Up @@ -16174,22 +16201,50 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
}

case IF_LABEL:
case IF_RWR_LABEL:
case IF_SWR_LABEL:
{
instrDescJmp* jmp = (instrDescJmp*)id;
assert(id->idGCref() == GCT_NONE);
assert(id->idIsBound() || emitJmpInstHasNoCode(id));

// TODO-XArch-Cleanup: handle IF_RWR_LABEL in emitOutputLJ() or change it to emitOutputAM()?
if (id->idCodeSize() != 0)
if (!jmp->idjIsRemovableJmpCandidate)
{
assert(id->idIsBound());
dst = emitOutputLJ(ig, dst, id);
}
#ifdef TARGET_AMD64
else if (jmp->idjIsAfterCallBeforeEpilog)
{
// Need to insert a nop if the removed jump was after a call and before an OS epilog
// (The code size should already be set to 1 for the nop)
assert(id->idCodeSize() == 1);
dst = emitOutputNOP(dst, 1);

// Set convertedJmpToNop in case we need to print this instrDesc as a nop in a disasm
convertedJmpToNop = true;
}
#endif // TARGET_AMD64
else
{
assert(((instrDescJmp*)id)->idjIsRemovableJmpCandidate);
// Jump was removed, and no nop was needed, so id should not have any code
assert(jmp->idjIsRemovableJmpCandidate);
assert(emitJmpInstHasNoCode(id));
}
sz = (id->idInsFmt() == IF_SWR_LABEL ? sizeof(instrDescLbl) : sizeof(instrDescJmp));

sz = sizeof(instrDescJmp);
break;
}
case IF_RWR_LABEL:
case IF_SWR_LABEL:
{
assert(id->idGCref() == GCT_NONE);
assert(id->idIsBound() || emitJmpInstHasNoCode(id));
instrDescJmp* jmp = (instrDescJmp*)id;

// Jump removal optimization is only for IF_LABEL
assert(!jmp->idjIsRemovableJmpCandidate);

// TODO-XArch-Cleanup: handle IF_RWR_LABEL in emitOutputLJ() or change it to emitOutputAM()?
dst = emitOutputLJ(ig, dst, id);
sz = (id->idInsFmt() == IF_SWR_LABEL ? sizeof(instrDescLbl) : sizeof(instrDescJmp));
break;
}

Expand Down Expand Up @@ -17447,8 +17502,14 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
break;
}

// Make sure we set the instruction descriptor size correctly
// Make sure we set the instruction descriptor size correctly
#ifdef TARGET_AMD64
// If a jump is replaced by a nop, its instrDesc is temporarily modified so the nop
// is displayed correctly in disasms. Check for this discrepancy to avoid triggering this assert.
assert(((ins == INS_jmp) && (id->idIns() == INS_nop)) || (sz == emitSizeOfInsDsc(id)));
#else // !TARGET_AMD64
assert(sz == emitSizeOfInsDsc(id));
#endif // !TARGET_AMD64

#if !FEATURE_FIXED_OUT_ARGS
bool updateStackLevel = !emitIGisInProlog(ig) && !emitIGisInEpilog(ig);
Expand Down Expand Up @@ -17505,14 +17566,47 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
assert(*dp != dst || emitInstHasNoCode(id));

#ifdef DEBUG
if ((emitComp->opts.disAsm || emitComp->verbose) && !emitJmpInstHasNoCode(id))
if ((emitComp->opts.disAsm || emitComp->verbose) && (!emitJmpInstHasNoCode(id) || convertedJmpToNop))
{
emitDispIns(id, false, dspOffs, true, emitCurCodeOffs(*dp), *dp, (dst - *dp));
#ifdef TARGET_AMD64
// convertedJmpToNop indicates this instruction is a removable jump that was replaced by a nop.
// The instrDesc still describes a jump, so in order to print the nop in the disasm correctly,
// set the instruction and format accordingly (and reset them after to avoid triggering asserts).
if (convertedJmpToNop)
{
id->idIns(INS_nop);
id->idInsFmt(IF_NONE);

emitDispIns(id, false, dspOffs, true, emitCurCodeOffs(*dp), *dp, (dst - *dp));

id->idIns(ins);
id->idInsFmt(insFmt);
}
else
#endif // TARGET_AMD64
{
emitDispIns(id, false, dspOffs, true, emitCurCodeOffs(*dp), *dp, (dst - *dp));
}
}
#else
if (emitComp->opts.disAsm && !emitJmpInstHasNoCode(id))
if (emitComp->opts.disAsm && (!emitJmpInstHasNoCode(id) || convertedJmpToNop))
{
emitDispIns(id, false, 0, true, emitCurCodeOffs(*dp), *dp, (dst - *dp));
#ifdef TARGET_AMD64
if (convertedJmpToNop)
{
id->idIns(INS_nop);
id->idInsFmt(IF_NONE);

emitDispIns(id, false, 0, true, emitCurCodeOffs(*dp), *dp, (dst - *dp));

id->idIns(ins);
id->idInsFmt(insFmt);
}
else
#endif // TARGET_AMD64
{
emitDispIns(id, false, 0, true, emitCurCodeOffs(*dp), *dp, (dst - *dp));
}
}
#endif

Expand Down

0 comments on commit 303571a

Please sign in to comment.