From 3fc8b099443a8b84bd158b8e49e38be6dd941180 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 8 Feb 2022 18:20:12 +0100 Subject: [PATCH] Add JIT support for control-flow guard on x64 and arm64 (#63763) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for generating control-flow guard checks. The support is enabled by a JIT flag or by setting COMPlus_JitForceControlFlowGuard=1. Co-authored-by: Michal Strehovský --- docs/design/coreclr/botr/clr-abi.md | 40 +++ .../templates/runtimes/run-test-job.yml | 11 +- eng/pipelines/coreclr/jit-cfg.yml | 52 +++ src/coreclr/inc/corinfo.h | 3 + src/coreclr/inc/corjitflags.h | 2 +- src/coreclr/inc/jiteeversionguid.h | 11 +- src/coreclr/inc/jithelpers.h | 8 + src/coreclr/jit/codegenarmarch.cpp | 31 +- src/coreclr/jit/codegencommon.cpp | 8 + src/coreclr/jit/codegenxarch.cpp | 36 +- src/coreclr/jit/compiler.cpp | 14 + src/coreclr/jit/compiler.h | 43 ++- src/coreclr/jit/emit.cpp | 6 + src/coreclr/jit/flowgraph.cpp | 2 +- src/coreclr/jit/gentree.h | 48 +++ src/coreclr/jit/jitconfigvalues.h | 8 + src/coreclr/jit/jitee.h | 3 +- src/coreclr/jit/lower.cpp | 338 ++++++++++++++++-- src/coreclr/jit/lower.h | 4 + src/coreclr/jit/lsrabuild.cpp | 40 +-- src/coreclr/jit/morph.cpp | 43 ++- src/coreclr/jit/targetamd64.h | 4 + src/coreclr/jit/targetarm.h | 3 + src/coreclr/jit/targetarm64.h | 4 + src/coreclr/jit/targetx86.h | 3 + .../Common/JitInterface/CorInfoHelpFunc.cs | 8 + .../tools/Common/JitInterface/CorInfoTypes.cs | 2 +- .../DependencyAnalysis/ExternSymbolNode.cs | 12 +- .../DependencyAnalysis/NodeFactory.cs | 11 + .../DependencyAnalysis/ObjectWriter.cs | 2 +- .../Compiler/RyuJitCompilationBuilder.cs | 3 + .../JitInterface/CorInfoImpl.RyuJit.cs | 5 + src/coreclr/vm/amd64/JitHelpers_Fast.asm | 13 + src/coreclr/vm/amd64/jithelpers_fast.S | 10 + src/coreclr/vm/arm64/asmhelpers.S | 8 + src/coreclr/vm/arm64/asmhelpers.asm | 8 + src/coreclr/vm/jithelpers.cpp | 4 + src/tests/Common/testenvironment.proj | 6 + 38 files changed, 747 insertions(+), 110 deletions(-) create mode 100644 eng/pipelines/coreclr/jit-cfg.yml diff --git a/docs/design/coreclr/botr/clr-abi.md b/docs/design/coreclr/botr/clr-abi.md index f7b01352de9224..65717698e421c4 100644 --- a/docs/design/coreclr/botr/clr-abi.md +++ b/docs/design/coreclr/botr/clr-abi.md @@ -752,3 +752,43 @@ The return value is handled as follows: 4. All other cases require the use of a return buffer, through which the value is returned. In addition, there is a guarantee that if a return buffer is used a value is stored there only upon ordinary exit from the method. The buffer is not allowed to be used for temporary storage within the method and its contents will be unaltered if an exception occurs while executing the method. + +# Control Flow Guard (CFG) support on Windows + +Control Flow Guard (CFG) is a security mitigation available in Windows. +When CFG is enabled, the operating system maintains data structures that can be used to verify whether an address is to be considered a valid indirect call target. +This mechanism is exposed through two different helper functions, each with different characteristics. + +The first mechanism is a validator that takes the target address as an argument and fails fast if the address is not an expected indirect call target; otherwise, it does nothing and returns. +The second mechanism is a dispatcher that takes the target address in a non-standard register; on successful validation of the address, it jumps directly to the target function. +Windows makes the dispatcher available only on ARM64 and x64, while the validator is available on all platforms. +However, the JIT supports CFG only on ARM64 and x64, with CFG by default being disabled for these platforms. +The expected use of the CFG feature is for NativeAOT scenarios that are running in constrained environments where CFG is required. + +The helpers are exposed to the JIT as standard JIT helpers `CORINFO_HELP_VALIDATE_INDIRECT_CALL` and `CORINFO_HELP_DISPATCH_INDIRECT_CALL`. + +To use the validator the JIT expands indirect calls into a call to the validator followed by a call to the validated address. +For the dispatcher the JIT will transform calls to pass the target along but otherwise set up the call as normal. + +Note that "indirect call" here refers to any call that is not to an immediate (in the instruction stream) address. +For example, even direct calls may emit indirect call instructions in JIT codegen due to e.g. tiering or if they have not been compiled yet; these are expanded with the CFG mechanism as well. + +The next sections describe the calling convention that the JIT expects from these helpers. + +## CFG details for ARM64 + +On ARM64, `CORINFO_HELP_VALIDATE_INDIRECT_CALL` takes the call address in `x15`. +In addition to the usual registers it preserves all float registers, `x0`-`x8` and `x15`. + +`CORINFO_HELP_DISPATCH_INDIRECT_CALL` takes the call address in `x9`. +The JIT does not use the dispatch helper by default due to worse branch predictor performance. +Therefore it will expand all indirect calls via the validation helper and a manual call. + +## CFG details for x64 + +On x64, `CORINFO_HELP_VALIDATE_INDIRECT_CALL` takes the call address in `rcx`. +In addition to the usual registers it also preserves all float registers and `rcx` and `r10`; furthermore, shadow stack space is not required to be allocated. + +`CORINFO_HELP_DISPATCH_INDIRECT_CALL` takes the call address in `rax` and it reserves the right to use and trash `r10` and `r11`. +The JIT uses the dispatch helper on x64 whenever possible as it is expected that the code size benefits outweighs the less accurate branch prediction. +However, note that the use of `r11` in the dispatcher makes it incompatible with VSD calls where the JIT must fall back to the validator and a manual call. diff --git a/eng/pipelines/common/templates/runtimes/run-test-job.yml b/eng/pipelines/common/templates/runtimes/run-test-job.yml index f944ad47ba2037..602a35c4ec6c8f 100644 --- a/eng/pipelines/common/templates/runtimes/run-test-job.yml +++ b/eng/pipelines/common/templates/runtimes/run-test-job.yml @@ -197,6 +197,9 @@ jobs: - ${{ if in(parameters.testGroup, 'pgo') }}: - name: timeoutPerTestCollectionInMinutes value: 120 + - ${{ if in(parameters.testGroup, 'jit-cfg') }}: + - name: timeoutPerTestCollectionInMinutes + value: 120 - ${{ if eq(parameters.compositeBuildMode, true) }}: - name: crossgenArg @@ -210,7 +213,7 @@ jobs: # TODO: update these numbers as they were determined long ago ${{ if eq(parameters.testGroup, 'innerloop') }}: timeoutInMinutes: 200 - ${{ if in(parameters.testGroup, 'outerloop', 'jit-experimental', 'pgo') }}: + ${{ if in(parameters.testGroup, 'outerloop', 'jit-experimental', 'pgo', 'jit-cfg') }}: timeoutInMinutes: 270 ${{ if in(parameters.testGroup, 'gc-longrunning', 'gc-simulator') }}: timeoutInMinutes: 480 @@ -551,6 +554,12 @@ jobs: - jitpartialcompilation_osr - jitpartialcompilation_osr_pgo - jitobjectstackallocation + ${{ if in(parameters.testGroup, 'jit-cfg') }}: + scenarios: + - jitcfg + - jitcfg_dispatcher_always + - jitcfg_dispatcher_never + - jitcfg_gcstress0xc ${{ if in(parameters.testGroup, 'ilasm') }}: scenarios: - ilasmroundtrip diff --git a/eng/pipelines/coreclr/jit-cfg.yml b/eng/pipelines/coreclr/jit-cfg.yml new file mode 100644 index 00000000000000..6b30445c2363c0 --- /dev/null +++ b/eng/pipelines/coreclr/jit-cfg.yml @@ -0,0 +1,52 @@ +trigger: none + +schedules: +- cron: "0 22 * * 0,6" + displayName: Sun at 2:00 PM (UTC-8:00) + branches: + include: + - main + always: true + +jobs: + +- template: /eng/pipelines/common/platform-matrix.yml + parameters: + jobTemplate: /eng/pipelines/common/build-coreclr-and-libraries-job.yml + buildConfig: checked + platforms: + - OSX_arm64 + - OSX_x64 + - Linux_arm64 + - Linux_x64 + - windows_arm64 + - windows_x64 + - CoreClrTestBuildHost # Either OSX_x64 or Linux_x64 + jobParameters: + testGroup: jit-cfg + +- template: /eng/pipelines/common/platform-matrix.yml + parameters: + jobTemplate: /eng/pipelines/common/templates/runtimes/build-test-job.yml + buildConfig: checked + platforms: + - CoreClrTestBuildHost # Either OSX_x64 or Linux_x64 + jobParameters: + testGroup: jit-cfg + +- template: /eng/pipelines/common/platform-matrix.yml + parameters: + jobTemplate: /eng/pipelines/common/templates/runtimes/run-test-job.yml + buildConfig: checked + platforms: + - OSX_arm64 + - OSX_x64 + - Linux_arm64 + - Linux_x64 + - windows_arm64 + - windows_x64 + helixQueueGroup: ci + helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml + jobParameters: + testGroup: jit-cfg + liveLibrariesBuildConfig: Release diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h index 46099e21382b54..4b52f6071eeab0 100644 --- a/src/coreclr/inc/corinfo.h +++ b/src/coreclr/inc/corinfo.h @@ -614,6 +614,9 @@ enum CorInfoHelpFunc CORINFO_HELP_CLASSPROFILE64, // Update 64-bit class profile for a call site CORINFO_HELP_PARTIAL_COMPILATION_PATCHPOINT, // Notify runtime that code has reached a part of the method that wasn't originally jitted. + CORINFO_HELP_VALIDATE_INDIRECT_CALL, // CFG: Validate function pointer + CORINFO_HELP_DISPATCH_INDIRECT_CALL, // CFG: Validate and dispatch to pointer + CORINFO_HELP_COUNT, }; diff --git a/src/coreclr/inc/corjitflags.h b/src/coreclr/inc/corjitflags.h index c749e876d2e0fa..bf4f990c7aa198 100644 --- a/src/coreclr/inc/corjitflags.h +++ b/src/coreclr/inc/corjitflags.h @@ -32,7 +32,7 @@ class CORJIT_FLAGS CORJIT_FLAG_DEBUG_EnC = 3, // We are in Edit-n-Continue mode CORJIT_FLAG_DEBUG_INFO = 4, // generate line and local-var info CORJIT_FLAG_MIN_OPT = 5, // disable all jit optimizations (not necesarily debuggable code) - CORJIT_FLAG_UNUSED1 = 6, + CORJIT_FLAG_ENABLE_CFG = 6, // generate control-flow guard checks CORJIT_FLAG_MCJIT_BACKGROUND = 7, // Calling from multicore JIT background thread, do not call JitComplete #if defined(TARGET_X86) diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index 66de16f77a0ca5..395532388fa036 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -43,13 +43,14 @@ typedef const GUID *LPCGUID; #define GUID_DEFINED #endif // !GUID_DEFINED -constexpr GUID JITEEVersionIdentifier = { /* ccb0c159-04b3-47f6-993e-79114c9cbef8 */ - 0xccb0c159, - 0x04b3, - 0x47f6, - {0x99, 0x3e, 0x79, 0x11, 0x4c, 0x9c, 0xbe, 0xf8} +constexpr GUID JITEEVersionIdentifier = { /* 63009f0c-662a-485b-bac1-ff67be6c7f9d */ + 0x63009f0c, + 0x662a, + 0x485b, + {0xba, 0xc1, 0xff, 0x67, 0xbe, 0x6c, 0x7f, 0x9d} }; + ////////////////////////////////////////////////////////////////////////////////////////////////////////// // // END JITEEVersionIdentifier diff --git a/src/coreclr/inc/jithelpers.h b/src/coreclr/inc/jithelpers.h index b9a1e3941da389..82b43a7ab01f56 100644 --- a/src/coreclr/inc/jithelpers.h +++ b/src/coreclr/inc/jithelpers.h @@ -351,6 +351,14 @@ JITHELPER(CORINFO_HELP_CLASSPROFILE64, JIT_ClassProfile64, CORINFO_HELP_SIG_REG_ONLY) JITHELPER(CORINFO_HELP_PARTIAL_COMPILATION_PATCHPOINT, JIT_PartialCompilationPatchpoint, CORINFO_HELP_SIG_REG_ONLY) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) + JITHELPER(CORINFO_HELP_VALIDATE_INDIRECT_CALL, JIT_ValidateIndirectCall, CORINFO_HELP_SIG_REG_ONLY) + JITHELPER(CORINFO_HELP_DISPATCH_INDIRECT_CALL, JIT_DispatchIndirectCall, CORINFO_HELP_SIG_REG_ONLY) +#else + JITHELPER(CORINFO_HELP_VALIDATE_INDIRECT_CALL, NULL, CORINFO_HELP_SIG_REG_ONLY) + JITHELPER(CORINFO_HELP_DISPATCH_INDIRECT_CALL, NULL, CORINFO_HELP_SIG_REG_ONLY) +#endif + #undef JITHELPER #undef DYNAMICJITHELPER #undef JITHELPER diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 24c57d110b7bab..610f66fb0db276 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -3134,9 +3134,6 @@ void CodeGen::genCall(GenTreeCall* call) // into a volatile register that won't be restored by epilog sequence. if (call->IsFastTailCall()) { - // Don't support fast tail calling JIT helpers - assert(call->gtCallType != CT_HELPER); - GenTree* target = getCallTarget(call, nullptr); if (target != nullptr) @@ -3177,22 +3174,28 @@ void CodeGen::genCall(GenTreeCall* call) genCallInstruction(call); - // if it was a pinvoke we may have needed to get the address of a label - if (genPendingCallLabel) + // for pinvoke/intrinsic/tailcalls we may have needed to get the address of + // a label. In case it is indirect with CFG enabled make sure we do not get + // the address after the validation but only after the actual call that + // comes after. + if (genPendingCallLabel && !call->IsHelperCall(compiler, CORINFO_HELP_VALIDATE_INDIRECT_CALL)) { genDefineInlineTempLabel(genPendingCallLabel); genPendingCallLabel = nullptr; } - // Update GC info: - // All Callee arg registers are trashed and no longer contain any GC pointers. - // TODO-Bug?: As a matter of fact shouldn't we be killing all of callee trashed regs here? - // For now we will assert that other than arg regs gc ref/byref set doesn't contain any other - // registers from RBM_CALLEE_TRASH - assert((gcInfo.gcRegGCrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0); - assert((gcInfo.gcRegByrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0); - gcInfo.gcRegGCrefSetCur &= ~RBM_ARG_REGS; - gcInfo.gcRegByrefSetCur &= ~RBM_ARG_REGS; +#ifdef DEBUG + // Killed registers should no longer contain any GC pointers. + regMaskTP killMask = RBM_CALLEE_TRASH; + if (call->IsHelperCall()) + { + CorInfoHelpFunc helpFunc = compiler->eeGetHelperNum(call->gtCallMethHnd); + killMask = compiler->compHelperCallKillSet(helpFunc); + } + + assert((gcInfo.gcRegGCrefSetCur & killMask) == 0); + assert((gcInfo.gcRegByrefSetCur & killMask) == 0); +#endif var_types returnType = call->TypeGet(); if (returnType != TYP_VOID) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 44e4e84f2dac2b..81e2dddff69e82 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -661,6 +661,9 @@ regMaskTP Compiler::compHelperCallKillSet(CorInfoHelpFunc helper) case CORINFO_HELP_INIT_PINVOKE_FRAME: return RBM_INIT_PINVOKE_FRAME_TRASH; + case CORINFO_HELP_VALIDATE_INDIRECT_CALL: + return RBM_VALIDATE_INDIRECT_CALL_TRASH; + default: return RBM_CALLEE_TRASH; } @@ -2204,6 +2207,11 @@ void CodeGen::genGenerateMachineCode() compiler->fgPgoInlineePgo, compiler->fgPgoInlineeNoPgoSingleBlock, compiler->fgPgoInlineeNoPgo); } + if (compiler->opts.IsCFGEnabled()) + { + printf("; control-flow guard enabled\n"); + } + if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_ALT_JIT)) { printf("; invoked as altjit\n"); diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index cfc97dc6b1370c..6c4c34542e4b46 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -5218,9 +5218,6 @@ void CodeGen::genCall(GenTreeCall* call) // that won't be restored by epilog sequence. if (call->IsFastTailCall()) { - // Don't support fast tail calling JIT helpers - assert(call->gtCallType != CT_HELPER); - GenTree* target = getCallTarget(call, nullptr); if (target != nullptr) { @@ -5272,22 +5269,28 @@ void CodeGen::genCall(GenTreeCall* call) genCallInstruction(call X86_ARG(stackArgBytes)); - // if it was a pinvoke or intrinsic we may have needed to get the address of a label - if (genPendingCallLabel) + // for pinvoke/intrinsic/tailcalls we may have needed to get the address of + // a label. In case it is indirect with CFG enabled make sure we do not get + // the address after the validation but only after the actual call that + // comes after. + if (genPendingCallLabel && !call->IsHelperCall(compiler, CORINFO_HELP_VALIDATE_INDIRECT_CALL)) { genDefineInlineTempLabel(genPendingCallLabel); genPendingCallLabel = nullptr; } - // Update GC info: - // All Callee arg registers are trashed and no longer contain any GC pointers. - // TODO-XArch-Bug?: As a matter of fact shouldn't we be killing all of callee trashed regs here? - // For now we will assert that other than arg regs gc ref/byref set doesn't contain any other - // registers from RBM_CALLEE_TRASH. - assert((gcInfo.gcRegGCrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0); - assert((gcInfo.gcRegByrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0); - gcInfo.gcRegGCrefSetCur &= ~RBM_ARG_REGS; - gcInfo.gcRegByrefSetCur &= ~RBM_ARG_REGS; +#ifdef DEBUG + // Killed registers should no longer contain any GC pointers. + regMaskTP killMask = RBM_CALLEE_TRASH; + if (call->IsHelperCall()) + { + CorInfoHelpFunc helpFunc = compiler->eeGetHelperNum(call->gtCallMethHnd); + killMask = compiler->compHelperCallKillSet(helpFunc); + } + + assert((gcInfo.gcRegGCrefSetCur & killMask) == 0); + assert((gcInfo.gcRegByrefSetCur & killMask) == 0); +#endif var_types returnType = call->TypeGet(); if (returnType != TYP_VOID) @@ -5563,6 +5566,11 @@ void CodeGen::genCallInstruction(GenTreeCall* call X86_ARG(target_ssize_t stackA #endif if (target->isContainedIndir()) { + // When CFG is enabled we should not be emitting any non-register indirect calls. + assert(!compiler->opts.IsCFGEnabled() || + call->IsHelperCall(compiler, CORINFO_HELP_VALIDATE_INDIRECT_CALL) || + call->IsHelperCall(compiler, CORINFO_HELP_DISPATCH_INDIRECT_CALL)); + if (target->AsIndir()->HasBase() && target->AsIndir()->Base()->isContainedIntOrIImmed()) { // Note that if gtControlExpr is an indir of an absolute address, we mark it as diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index dfa605d137d337..851ed82248f5f8 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -8848,6 +8848,20 @@ void dTreeLIR(GenTree* tree) cTreeLIR(JitTls::GetCompiler(), tree); } +void dTreeRange(GenTree* first, GenTree* last) +{ + Compiler* comp = JitTls::GetCompiler(); + GenTree* cur = first; + while (true) + { + cTreeLIR(comp, cur); + if (cur == last) + break; + + cur = cur->gtNext; + } +} + void dTrees() { cTrees(JitTls::GetCompiler()); diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 4be262df8552e3..c63edea0d327f1 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -1743,6 +1743,7 @@ struct fgArgTabEntry case NonStandardArgKind::ShiftLow: case NonStandardArgKind::ShiftHigh: case NonStandardArgKind::FixedRetBuffer: + case NonStandardArgKind::ValidateIndirectCallTarget: return false; case NonStandardArgKind::WrapperDelegateCell: case NonStandardArgKind::VirtualStubCell: @@ -9604,7 +9605,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX compMinOptsIsUsed = true; return compMinOpts; } - bool IsMinOptsSet() + bool IsMinOptsSet() const { return compMinOptsIsSet; } @@ -9613,7 +9614,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX { return compMinOpts; } - bool IsMinOptsSet() + bool IsMinOptsSet() const { return compMinOptsIsSet; } @@ -9637,23 +9638,55 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX } // true if the CLFLG_* for an optimization is set. - bool OptEnabled(unsigned optFlag) + bool OptEnabled(unsigned optFlag) const { return !!(compFlags & optFlag); } #ifdef FEATURE_READYTORUN - bool IsReadyToRun() + bool IsReadyToRun() const { return jitFlags->IsSet(JitFlags::JIT_FLAG_READYTORUN); } #else - bool IsReadyToRun() + bool IsReadyToRun() const { return false; } #endif + // Check if the compilation is control-flow guard enabled. + bool IsCFGEnabled() const + { +#if defined(TARGET_ARM64) || defined(TARGET_AMD64) + // On these platforms we assume the register that the target is + // passed in is preserved by the validator and take care to get the + // target from the register for the call (even in debug mode). + static_assert_no_msg((RBM_VALIDATE_INDIRECT_CALL_TRASH & (1 << REG_VALIDATE_INDIRECT_CALL_ADDR)) == 0); + if (JitConfig.JitForceControlFlowGuard()) + return true; + + return jitFlags->IsSet(JitFlags::JIT_FLAG_ENABLE_CFG); +#else + // The remaining platforms are not supported and would require some + // work to support. + // + // ARM32: + // The ARM32 validator does not preserve any volatile registers + // which means we have to take special care to allocate and use a + // callee-saved register (reloading the target from memory is a + // security issue). + // + // x86: + // On x86 some VSD calls disassemble the call site and expect an + // indirect call which is fundamentally incompatible with CFG. + // This would require a different way to pass this information + // through. + // + return false; +#endif + } + #ifdef FEATURE_ON_STACK_REPLACEMENT bool IsOSR() const { diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index 9f0a2cf6a897e2..4cabb028b115b0 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -2507,6 +2507,8 @@ bool emitter::emitNoGChelper(CorInfoHelpFunc helpFunc) case CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE_NOCTOR: case CORINFO_HELP_INIT_PINVOKE_FRAME: + + case CORINFO_HELP_VALIDATE_INDIRECT_CALL: return true; default: @@ -9323,6 +9325,10 @@ regMaskTP emitter::emitGetGCRegsKilledByNoGCCall(CorInfoHelpFunc helper) break; #endif // defined(TARGET_X86) + case CORINFO_HELP_VALIDATE_INDIRECT_CALL: + result = RBM_VALIDATE_INDIRECT_CALL_TRASH; + break; + default: result = RBM_CALLEE_TRASH_NOGC; break; diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index e2786ce57d820d..2311857bab7d73 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -2996,7 +2996,7 @@ void Compiler::fgSimpleLowering() GenTreeCall* call = tree->AsCall(); // Fast tail calls use the caller-supplied scratch // space so have no impact on this method's outgoing arg size. - if (!call->IsFastTailCall()) + if (!call->IsFastTailCall() && !call->IsHelperCall(this, CORINFO_HELP_VALIDATE_INDIRECT_CALL)) { // Update outgoing arg size to handle this call const unsigned thisCallOutAreaSize = call->fgArgInfo->GetOutArgSize(); diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index b023c2bb8c52d4..a6facdf4c196d0 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -1286,6 +1286,11 @@ struct GenTree return OperIsPutArgStk() || OperIsPutArgReg() || OperIsPutArgSplit(); } + bool OperIsFieldList() const + { + return OperIs(GT_FIELD_LIST); + } + bool OperIsMultiRegOp() const { #if !defined(TARGET_64BIT) @@ -3979,6 +3984,7 @@ enum class NonStandardArgKind : unsigned FixedRetBuffer, VirtualStubCell, R2RIndirectionCell, + ValidateIndirectCallTarget, // If changing this enum also change getNonStandardArgKindName and isNonStandardArgAddedLate in fgArgInfo }; @@ -3987,6 +3993,12 @@ enum class NonStandardArgKind : unsigned const char* getNonStandardArgKindName(NonStandardArgKind kind); #endif +enum class CFGCallKind +{ + ValidateAndCall, + Dispatch, +}; + struct GenTreeCall final : public GenTree { class Use @@ -4689,6 +4701,42 @@ struct GenTreeCall final : public GenTree return NonStandardArgKind::None; } + CFGCallKind GetCFGCallKind() + { +#if defined(TARGET_AMD64) + // On x64 the dispatcher is more performant, but we cannot use it when + // we need to pass indirection cells as those go into registers that + // are clobbered by the dispatch helper. + bool mayUseDispatcher = GetIndirectionCellArgKind() == NonStandardArgKind::None; + bool shouldUseDispatcher = true; +#elif defined(TARGET_AMD64) + bool mayUseDispatcher = true; + // Branch predictors on ARM64 generally do not handle the dispatcher as + // well as on x64 hardware, so only use the validator by default. + bool shouldUseDispatcher = false; +#else + // Other platforms do not even support the dispatcher. + bool mayUseDispatcher = false; + bool shouldUseDispatcher = false; +#endif + +#ifdef DEBUG + switch (JitConfig.JitCFGUseDispatcher()) + { + case 0: + shouldUseDispatcher = false; + break; + case 1: + shouldUseDispatcher = true; + break; + default: + break; + } +#endif + + return mayUseDispatcher && shouldUseDispatcher ? CFGCallKind::Dispatch : CFGCallKind::ValidateAndCall; + } + void ResetArgInfo(); GenTreeCallFlags gtCallMoreFlags; // in addition to gtFlags diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index b53588a6937217..355bbe1f9f3ee3 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -503,6 +503,14 @@ CONFIG_INTEGER(JitNoteFailedExactDevirtualization, W("JitNoteFailedExactDevirtua CONFIG_INTEGER(JitExpandCallsEarly, W("JitExpandCallsEarly"), 1) // Expand Call targets early (in the global morph // phase) +// Force the generation of CFG checks +CONFIG_INTEGER(JitForceControlFlowGuard, W("JitForceControlFlowGuard"), 0); +// JitCFGUseDispatcher values: +// 0: Never use dispatcher +// 1: Use dispatcher on all platforms that support it +// 2: Default behavior, depends on platform (yes on x64, no on arm64) +CONFIG_INTEGER(JitCFGUseDispatcher, W("JitCFGUseDispatcher"), 2) + #if defined(DEBUG) // JitFunctionFile: Name of a file that contains a list of functions. If the currently compiled function is in the // file, certain other JIT config variables will be active. If the currently compiled function is not in the file, diff --git a/src/coreclr/jit/jitee.h b/src/coreclr/jit/jitee.h index 27149c824348aa..58eddca01b61c6 100644 --- a/src/coreclr/jit/jitee.h +++ b/src/coreclr/jit/jitee.h @@ -17,7 +17,7 @@ class JitFlags JIT_FLAG_DEBUG_EnC = 3, // We are in Edit-n-Continue mode JIT_FLAG_DEBUG_INFO = 4, // generate line and local-var info JIT_FLAG_MIN_OPT = 5, // disable all jit optimizations (not necesarily debuggable code) - JIT_FLAG_UNUSED1 = 6, + JIT_FLAG_ENABLE_CFG = 6, // generate CFG enabled code JIT_FLAG_MCJIT_BACKGROUND = 7, // Calling from multicore JIT background thread, do not call JitComplete #if defined(TARGET_X86) @@ -174,6 +174,7 @@ class JitFlags FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_DEBUG_EnC, JIT_FLAG_DEBUG_EnC); FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_DEBUG_INFO, JIT_FLAG_DEBUG_INFO); FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_MIN_OPT, JIT_FLAG_MIN_OPT); + FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_ENABLE_CFG, JIT_FLAG_ENABLE_CFG); FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_MCJIT_BACKGROUND, JIT_FLAG_MCJIT_BACKGROUND); #if defined(TARGET_X86) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 614253e7c89239..ed72fcbd8c92ac 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -1640,7 +1640,7 @@ void Lowering::LowerCall(GenTree* node) call->ClearOtherRegs(); LowerArgsForCall(call); - // note that everything generated from this point on runs AFTER the outgoing args are placed + // note that everything generated from this point might run AFTER the outgoing args are placed GenTree* controlExpr = nullptr; bool callWasExpandedEarly = false; @@ -1697,6 +1697,10 @@ void Lowering::LowerCall(GenTree* node) } } + // Indirect calls should always go through GenTreeCall::gtCallAddr and + // should never have a control expression as well. + assert((call->gtCallType != CT_INDIRECT) || (controlExpr == nullptr)); + if (call->IsTailCallViaJitHelper()) { // Either controlExpr or gtCallAddr must contain real call target. @@ -1719,40 +1723,17 @@ void Lowering::LowerCall(GenTree* node) JITDUMP("results of lowering call:\n"); DISPRANGE(controlExprRange); - GenTree* insertionPoint = call; - if (!call->IsTailCallViaJitHelper()) - { - // The controlExpr should go before the gtCallCookie and the gtCallAddr, if they exist - // - // TODO-LIR: find out what's really required here, as this is currently a tree order - // dependency. - if (call->gtCallType == CT_INDIRECT) - { - bool isClosed = false; - if (call->gtCallCookie != nullptr) - { -#ifdef DEBUG - GenTree* firstCallAddrNode = BlockRange().GetTreeRange(call->gtCallAddr, &isClosed).FirstNode(); - assert(isClosed); - assert(call->gtCallCookie->Precedes(firstCallAddrNode)); -#endif // DEBUG - - insertionPoint = BlockRange().GetTreeRange(call->gtCallCookie, &isClosed).FirstNode(); - assert(isClosed); - } - else if (call->gtCallAddr != nullptr) - { - insertionPoint = BlockRange().GetTreeRange(call->gtCallAddr, &isClosed).FirstNode(); - assert(isClosed); - } - } - } - ContainCheckRange(controlExprRange); - BlockRange().InsertBefore(insertionPoint, std::move(controlExprRange)); + BlockRange().InsertBefore(call, std::move(controlExprRange)); call->gtControlExpr = controlExpr; } + + if (comp->opts.IsCFGEnabled()) + { + LowerCFGCall(call); + } + if (call->IsFastTailCall()) { // Lower fast tail call can introduce new temps to set up args correctly for Callee. @@ -2306,6 +2287,295 @@ GenTree* Lowering::LowerTailCallViaJitHelper(GenTreeCall* call, GenTree* callTar return result; } +//------------------------------------------------------------------------ +// LowerCFGCall: Potentially lower a call to use control-flow guard. This +// expands indirect calls into either a validate+call sequence or to a dispatch +// helper taking the original target in a special register. +// +// Arguments: +// call - The call node +// +void Lowering::LowerCFGCall(GenTreeCall* call) +{ + assert(!call->IsHelperCall(comp, CORINFO_HELP_DISPATCH_INDIRECT_CALL)); + if (call->IsHelperCall(comp, CORINFO_HELP_VALIDATE_INDIRECT_CALL)) + { + return; + } + + GenTree* callTarget = call->gtCallType == CT_INDIRECT ? call->gtCallAddr : call->gtControlExpr; + if ((callTarget == nullptr) || callTarget->IsIntegralConst()) + { + // This is a direct call, no CFG check is necessary. + return; + } + + CFGCallKind cfgKind = call->GetCFGCallKind(); + + switch (cfgKind) + { + case CFGCallKind::ValidateAndCall: + { + // To safely apply CFG we need to generate a very specific pattern: + // in particular, it is a safety issue to allow the JIT to reload + // the call target from memory between calling + // CORINFO_HELP_VALIDATE_INDIRECT_CALL and the target. This is + // something that would easily occur in debug codegen if we + // produced high-level IR. Instead we will use a GT_PHYSREG node + // to get the target back from the register that contains the target. + // + // Additionally, the validator does not preserve all arg registers, + // so we have to move all GT_PUTARG_REG nodes that would otherwise + // be trashed ahead. The JIT also has an internal invariant that + // once GT_PUTARG nodes start to appear in LIR, the call is coming + // up. To avoid breaking this invariant we move _all_ GT_PUTARG + // nodes (in particular, GC info reporting relies on this). + // + // To sum up, we end up transforming + // + // ta... = + // tb... = + // tc = callTarget + // GT_CALL tc, ta..., tb... + // + // into + // + // ta... = (without GT_PUTARG_* nodes) + // tb = callTarget + // GT_CALL CORINFO_HELP_VALIDATE_INDIRECT_CALL, tb + // tc = GT_PHYSREG REG_VALIDATE_INDIRECT_CALL_ADDR (preserved by helper) + // td = + // GT_CALL tb, ta..., td.. + // + + GenTree* regNode = PhysReg(REG_VALIDATE_INDIRECT_CALL_ADDR, TYP_I_IMPL); + LIR::Use useOfTar; + bool gotUse = BlockRange().TryGetUse(callTarget, &useOfTar); + assert(gotUse); + useOfTar.ReplaceWith(regNode); + + GenTree* targetPlaceholder = comp->gtNewZeroConNode(callTarget->TypeGet()); + // Add the call to the validator. Use a placeholder for the target while we + // morph, sequence and lower, to avoid redoing that for the actual target. + GenTreeCall::Use* args = comp->gtNewCallArgs(targetPlaceholder); + GenTreeCall* validate = comp->gtNewHelperCallNode(CORINFO_HELP_VALIDATE_INDIRECT_CALL, TYP_VOID, args); + + comp->fgMorphTree(validate); + + LIR::Range validateRange = LIR::SeqTree(comp, validate); + GenTree* validateFirst = validateRange.FirstNode(); + GenTree* validateLast = validateRange.LastNode(); + // Insert the validator with the call target before the late args. + BlockRange().InsertBefore(call, std::move(validateRange)); + + // Swap out the target + gotUse = BlockRange().TryGetUse(targetPlaceholder, &useOfTar); + assert(gotUse); + useOfTar.ReplaceWith(callTarget); + targetPlaceholder->SetUnusedValue(); + + LowerRange(validateFirst, validateLast); + + // Insert the PHYSREG node that we must load right after validation. + BlockRange().InsertAfter(validate, regNode); + LowerNode(regNode); + + // Finally move all GT_PUTARG_* nodes + for (GenTreeCall::Use& use : call->Args()) + { + GenTree* node = use.GetNode(); + if (!node->IsValue()) + { + // Non-value nodes in early args are setup nodes for late args. + continue; + } + + assert(node->OperIsPutArg() || node->OperIsFieldList()); + MoveCFGCallArg(call, node); + } + + for (GenTreeCall::Use& use : call->LateArgs()) + { + GenTree* node = use.GetNode(); + assert(node->OperIsPutArg() || node->OperIsFieldList()); + MoveCFGCallArg(call, node); + } + break; + } + case CFGCallKind::Dispatch: + { +#ifdef REG_DISPATCH_INDIRECT_CALL_ADDR + // Now insert the call target as an extra argument. + // + // First append the early placeholder arg + GenTreeCall::Use** earlySlot = &call->gtCallArgs; + unsigned int index = call->gtCallThisArg != nullptr ? 1 : 0; + while (*earlySlot != nullptr) + { + earlySlot = &(*earlySlot)->NextRef(); + index++; + } + + assert(index == call->fgArgInfo->ArgCount()); + GenTree* placeHolder = comp->gtNewArgPlaceHolderNode(callTarget->TypeGet(), NO_CLASS_HANDLE); + placeHolder->gtFlags |= GTF_LATE_ARG; + *earlySlot = comp->gtNewCallArgs(placeHolder); + + // Append the late actual arg + GenTreeCall::Use** lateSlot = &call->gtCallLateArgs; + unsigned int lateIndex = 0; + while (*lateSlot != nullptr) + { + lateSlot = &(*lateSlot)->NextRef(); + lateIndex++; + } + + *lateSlot = comp->gtNewCallArgs(callTarget); + + // Add an entry into the arg info + regNumber regNum = REG_DISPATCH_INDIRECT_CALL_ADDR; + unsigned numRegs = 1; + unsigned byteSize = TARGET_POINTER_SIZE; + unsigned byteAlignment = TARGET_POINTER_SIZE; + bool isStruct = false; + bool isFloatHfa = false; + bool isVararg = false; + + fgArgTabEntry* entry = + call->fgArgInfo->AddRegArg(index, placeHolder, *earlySlot, regNum, numRegs, byteSize, byteAlignment, + isStruct, isFloatHfa, + isVararg UNIX_AMD64_ABI_ONLY_ARG(REG_STK) UNIX_AMD64_ABI_ONLY_ARG(0) + UNIX_AMD64_ABI_ONLY_ARG(0) UNIX_AMD64_ABI_ONLY_ARG(nullptr)); + + entry->lateUse = *lateSlot; + entry->SetLateArgInx(lateIndex); + + // Lower the newly added args now that call is updated + LowerArg(call, &(*earlySlot)->NodeRef()); + LowerArg(call, &(*lateSlot)->NodeRef()); + + // Finally update the call to be a helper call + call->gtCallType = CT_HELPER; + call->gtCallMethHnd = comp->eeFindHelper(CORINFO_HELP_DISPATCH_INDIRECT_CALL); + call->gtFlags &= ~GTF_CALL_VIRT_KIND_MASK; +#ifdef FEATURE_READYTORUN + call->gtEntryPoint.addr = nullptr; + call->gtEntryPoint.accessType = IAT_VALUE; +#endif + + // Now relower the call target + call->gtControlExpr = LowerDirectCall(call); + + if (call->gtControlExpr != nullptr) + { + LIR::Range dispatchControlExprRange = LIR::SeqTree(comp, call->gtControlExpr); + + ContainCheckRange(dispatchControlExprRange); + BlockRange().InsertBefore(call, std::move(dispatchControlExprRange)); + } +#else + assert(!"Unexpected CFGCallKind::Dispatch for platform without dispatcher"); +#endif + break; + } + default: + unreached(); + } +} + +//------------------------------------------------------------------------ +// IsInvariantInRange: Check if a node is invariant in the specified range. In +// other words, can 'node' be moved to right before 'endExclusive' without its +// computation changing values? +// +// Arguments: +// node - The node. +// endExclusive - The exclusive end of the range to check invariance for. +// +bool Lowering::IsInvariantInRange(GenTree* node, GenTree* endExclusive) +{ + assert(node->Precedes(endExclusive)); + + if (node->IsInvariant()) + { + return true; + } + + if (!node->IsValue()) + { + return false; + } + + if (node->OperIsLocal()) + { + GenTreeLclVarCommon* lcl = node->AsLclVarCommon(); + LclVarDsc* desc = comp->lvaGetDesc(lcl); + if (desc->IsAddressExposed()) + { + return false; + } + + // Currently, non-address exposed locals have the property that their + // use occurs at the user, so no further interference check is + // necessary. + return true; + } + + return false; +} + +//------------------------------------------------------------------------ +// MoveCFGCallArg: Given a call that will be CFG transformed using the +// validate+call scheme, and an argument GT_PUTARG_* or GT_FIELD_LIST node, +// move that node right before the call. +// +// Arguments: +// call - The call that is being CFG transformed +// node - The argument node +// +// Remarks: +// We can always move the GT_PUTARG_* node further ahead as the side-effects +// of these nodes are handled by LSRA. However, the operands of these nodes +// are not always safe to move further ahead; for invariant operands, we +// move them ahead as well to shorten the lifetime of these values. +// +void Lowering::MoveCFGCallArg(GenTreeCall* call, GenTree* node) +{ + assert(node->OperIsPutArg() || node->OperIsFieldList()); + + if (node->OperIsFieldList()) + { + JITDUMP("Node is a GT_FIELD_LIST; moving all operands\n"); + for (GenTreeFieldList::Use& operand : node->AsFieldList()->Uses()) + { + assert(operand.GetNode()->OperIsPutArg()); + MoveCFGCallArg(call, operand.GetNode()); + } + } + else + { + GenTree* operand = node->AsOp()->gtGetOp1(); + JITDUMP("Checking if we can move operand of GT_PUTARG_* node:\n"); + DISPTREE(operand); + if (((operand->gtFlags & GTF_ALL_EFFECT) == 0) && IsInvariantInRange(operand, call)) + { + JITDUMP("...yes, moving to after validator call\n"); + BlockRange().Remove(operand); + BlockRange().InsertBefore(call, operand); + } + else + { + JITDUMP("...no, operand has side effects or is not invariant\n"); + } + } + + JITDUMP("Moving\n"); + DISPTREE(node); + JITDUMP("\n"); + BlockRange().Remove(node); + BlockRange().InsertBefore(call, node); +} + #ifndef TARGET_64BIT //------------------------------------------------------------------------ // Lowering::DecomposeLongCompare: Decomposes a TYP_LONG compare node. @@ -3693,10 +3963,6 @@ GenTree* Lowering::LowerDirectCall(GenTreeCall* call) { noway_assert(call->gtCallType == CT_USER_FUNC || call->gtCallType == CT_HELPER); - // Don't support tail calling helper methods. - // But we might encounter tail calls dispatched via JIT helper appear as a tail call to helper. - noway_assert(!call->IsTailCall() || call->IsTailCallViaJitHelper() || call->gtCallType == CT_USER_FUNC); - // Non-virtual direct/indirect calls: Work out if the address of the // call is known at JIT time. If not it is either an indirect call // or the address must be accessed via an single/double indirection. @@ -4851,7 +5117,7 @@ GenTree* Lowering::LowerVirtualStubCall(GenTreeCall* call) // Skip inserting the indirection node to load the address that is already // computed in the VSD stub arg register as a hidden parameter. Instead during the // codegen, just load the call target from there. - shouldOptimizeVirtualStubCall = true; + shouldOptimizeVirtualStubCall = !comp->opts.IsCFGEnabled(); #endif if (!shouldOptimizeVirtualStubCall) diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index 186203f20ed991..7f2d0ff999408b 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -119,10 +119,14 @@ class Lowering final : public Phase void LowerBlock(BasicBlock* block); GenTree* LowerNode(GenTree* node); + bool IsInvariantInRange(GenTree* node, GenTree* endExclusive); + // ------------------------------ // Call Lowering // ------------------------------ void LowerCall(GenTree* call); + void LowerCFGCall(GenTreeCall* call); + void MoveCFGCallArg(GenTreeCall* call, GenTree* node); #ifndef TARGET_64BIT GenTree* DecomposeLongCompare(GenTree* cmp); #endif diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index d58f1996091fae..46d13164c77989 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -859,7 +859,7 @@ regMaskTP LinearScan::getKillSetForModDiv(GenTreeOp* node) // regMaskTP LinearScan::getKillSetForCall(GenTreeCall* call) { - regMaskTP killMask = RBM_NONE; + regMaskTP killMask = RBM_CALLEE_TRASH; #ifdef TARGET_X86 if (compiler->compFloatingPointUsed) { @@ -873,38 +873,30 @@ regMaskTP LinearScan::getKillSetForCall(GenTreeCall* call) } } #endif // TARGET_X86 -#if defined(TARGET_X86) || defined(TARGET_ARM) if (call->IsHelperCall()) { CorInfoHelpFunc helpFunc = compiler->eeGetHelperNum(call->gtCallMethHnd); killMask = compiler->compHelperCallKillSet(helpFunc); } - else -#endif // defined(TARGET_X86) || defined(TARGET_ARM) + + // if there is no FP used, we can ignore the FP kills + if (!compiler->compFloatingPointUsed) { - // if there is no FP used, we can ignore the FP kills - if (compiler->compFloatingPointUsed) - { - killMask = RBM_CALLEE_TRASH; - } - else - { - killMask = RBM_INT_CALLEE_TRASH; - } + killMask &= ~RBM_FLT_CALLEE_TRASH; + } #ifdef TARGET_ARM - if (call->IsVirtualStub()) - { - killMask |= compiler->virtualStubParamInfo->GetRegMask(); - } + if (call->IsVirtualStub()) + { + killMask |= compiler->virtualStubParamInfo->GetRegMask(); + } #else // !TARGET_ARM - // Verify that the special virtual stub call registers are in the kill mask. - // We don't just add them unconditionally to the killMask because for most architectures - // they are already in the RBM_CALLEE_TRASH set, - // and we don't want to introduce extra checks and calls in this hot function. - assert(!call->IsVirtualStub() || ((killMask & compiler->virtualStubParamInfo->GetRegMask()) == - compiler->virtualStubParamInfo->GetRegMask())); + // Verify that the special virtual stub call registers are in the kill mask. + // We don't just add them unconditionally to the killMask because for most architectures + // they are already in the RBM_CALLEE_TRASH set, + // and we don't want to introduce extra checks and calls in this hot function. + assert(!call->IsVirtualStub() || + ((killMask & compiler->virtualStubParamInfo->GetRegMask()) == compiler->virtualStubParamInfo->GetRegMask())); #endif // !TARGET_ARM - } return killMask; } diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 926947968ca993..4a6bb66066bba3 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -590,6 +590,8 @@ const char* getNonStandardArgKindName(NonStandardArgKind kind) return "VirtualStubCell"; case NonStandardArgKind::R2RIndirectionCell: return "R2RIndirectionCell"; + case NonStandardArgKind::ValidateIndirectCallTarget: + return "ValidateIndirectCallTarget"; default: unreached(); } @@ -859,6 +861,14 @@ fgArgTabEntry* fgArgInfo::AddRegArg(unsigned argNum, curArgTabEntry->SetByteOffset(0); hasRegArgs = true; + if (argCount >= argTableSize) + { + fgArgTabEntry** oldTable = argTable; + argTable = new (compiler, CMK_fgArgInfoPtrArr) fgArgTabEntry*[argCount + 1]; + memcpy(argTable, oldTable, argCount * sizeof(fgArgTabEntry*)); + argTableSize++; + } + AddArg(curArgTabEntry); return curArgTabEntry; } @@ -1432,6 +1442,29 @@ void fgArgInfo::ArgsComplete() } } + // When CFG is enabled and this is a delegate call or vtable call we must + // compute the call target before all late args. However this will + // effectively null-check 'this', which should happen only after all + // arguments are evaluated. Thus we must evaluate all args with side + // effects to a temp. + if (compiler->opts.IsCFGEnabled() && (callTree->IsVirtualVtable() || callTree->IsDelegateInvoke())) + { + // Always evaluate 'this' to temp. + argTable[0]->needTmp = true; + needsTemps = true; + + for (unsigned curInx = 1; curInx < argCount; curInx++) + { + fgArgTabEntry* curArgTabEntry = argTable[curInx]; + GenTree* arg = curArgTabEntry->GetNode(); + if ((arg->gtFlags & GTF_ALL_EFFECT) != 0) + { + curArgTabEntry->needTmp = true; + needsTemps = true; + } + } + } + argsComplete = true; } @@ -1882,7 +1915,7 @@ void fgArgInfo::EvalArgsToTemps() if (curArgTabEntry->needTmp) { - if (curArgTabEntry->isTmp == true) + if (curArgTabEntry->isTmp) { // Create a copy of the temp to go into the late argument list defArg = compiler->fgMakeTmpArgNode(curArgTabEntry); @@ -2566,6 +2599,14 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) } #endif + if ((REG_VALIDATE_INDIRECT_CALL_ADDR != REG_ARG_0) && call->IsHelperCall(this, CORINFO_HELP_VALIDATE_INDIRECT_CALL)) + { + assert(call->gtCallArgs != nullptr); + GenTreeCall::Use* args = call->gtCallArgs; + GenTree* tar = args->GetNode(); + nonStandardArgs.Add(tar, REG_VALIDATE_INDIRECT_CALL_ADDR, NonStandardArgKind::ValidateIndirectCallTarget); + } + // Allocate the fgArgInfo for the call node; // call->fgArgInfo = new (this, CMK_Unknown) fgArgInfo(this, call, numArgs); diff --git a/src/coreclr/jit/targetamd64.h b/src/coreclr/jit/targetamd64.h index 77df2d8547355b..50ced88dcb88a3 100644 --- a/src/coreclr/jit/targetamd64.h +++ b/src/coreclr/jit/targetamd64.h @@ -390,6 +390,10 @@ // The registers trashed by the CORINFO_HELP_INIT_PINVOKE_FRAME helper. #define RBM_INIT_PINVOKE_FRAME_TRASH RBM_CALLEE_TRASH + #define RBM_VALIDATE_INDIRECT_CALL_TRASH (RBM_INT_CALLEE_TRASH & ~(RBM_R10 | RBM_RCX)) + #define REG_VALIDATE_INDIRECT_CALL_ADDR REG_RCX + #define REG_DISPATCH_INDIRECT_CALL_ADDR REG_RAX + // What sort of reloc do we use for [disp32] address mode #define IMAGE_REL_BASED_DISP32 IMAGE_REL_BASED_REL32 diff --git a/src/coreclr/jit/targetarm.h b/src/coreclr/jit/targetarm.h index f0545335231e8f..fd68c912955947 100644 --- a/src/coreclr/jit/targetarm.h +++ b/src/coreclr/jit/targetarm.h @@ -240,6 +240,9 @@ // The registers trashed by the CORINFO_HELP_INIT_PINVOKE_FRAME helper. #define RBM_INIT_PINVOKE_FRAME_TRASH (RBM_CALLEE_TRASH | RBM_PINVOKE_TCB | RBM_PINVOKE_SCRATCH) + #define RBM_VALIDATE_INDIRECT_CALL_TRASH (RBM_INT_CALLEE_TRASH) + #define REG_VALIDATE_INDIRECT_CALL_ADDR REG_R0 + #define REG_FPBASE REG_R11 #define RBM_FPBASE RBM_R11 #define STR_FPBASE "r11" diff --git a/src/coreclr/jit/targetarm64.h b/src/coreclr/jit/targetarm64.h index a4ef3a96782f72..1901d21cef4fa6 100644 --- a/src/coreclr/jit/targetarm64.h +++ b/src/coreclr/jit/targetarm64.h @@ -251,6 +251,10 @@ // The registers trashed by the CORINFO_HELP_INIT_PINVOKE_FRAME helper. #define RBM_INIT_PINVOKE_FRAME_TRASH RBM_CALLEE_TRASH + #define RBM_VALIDATE_INDIRECT_CALL_TRASH (RBM_INT_CALLEE_TRASH & ~(RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7 | RBM_R8 | RBM_R15)) + #define REG_VALIDATE_INDIRECT_CALL_ADDR REG_R15 + #define REG_DISPATCH_INDIRECT_CALL_ADDR REG_R9 + #define REG_FPBASE REG_FP #define RBM_FPBASE RBM_FP #define STR_FPBASE "fp" diff --git a/src/coreclr/jit/targetx86.h b/src/coreclr/jit/targetx86.h index 776c9deece36ef..ec85659d63cbc8 100644 --- a/src/coreclr/jit/targetx86.h +++ b/src/coreclr/jit/targetx86.h @@ -228,6 +228,9 @@ // convention that takes EDI as argument (but doesn't trash it), trashes EAX, and returns ESI. #define RBM_INIT_PINVOKE_FRAME_TRASH (RBM_PINVOKE_SCRATCH | RBM_PINVOKE_TCB) + #define RBM_VALIDATE_INDIRECT_CALL_TRASH (RBM_INT_CALLEE_TRASH & ~RBM_ECX) + #define REG_VALIDATE_INDIRECT_CALL_ADDR REG_ECX + #define REG_FPBASE REG_EBP #define RBM_FPBASE RBM_EBP #define STR_FPBASE "ebp" diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs index 6d8a5ec684f523..0e5f836ff21102 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs @@ -295,6 +295,14 @@ which is the right helper to use to allocate an object of a given type. */ CORINFO_HELP_STACK_PROBE, // Probes each page of the allocated stack frame + CORINFO_HELP_PATCHPOINT, // Notify runtime that code has reached a patchpoint + CORINFO_HELP_CLASSPROFILE32, // Update 32-bit class profile for a call site + CORINFO_HELP_CLASSPROFILE64, // Update 64-bit class profile for a call site + CORINFO_HELP_PARTIAL_COMPILATION_PATCHPOINT, // Notify runtime that code has reached a part of the method that wasn't originally jitted. + + CORINFO_HELP_VALIDATE_INDIRECT_CALL, // CFG: Validate function pointer + CORINFO_HELP_DISPATCH_INDIRECT_CALL, // CFG: Validate and dispatch to pointer + CORINFO_HELP_COUNT, } } diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs b/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs index 3e348263103855..6274c55c859968 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs @@ -1324,7 +1324,7 @@ public enum CorJitFlag : uint CORJIT_FLAG_DEBUG_EnC = 3, // We are in Edit-n-Continue mode CORJIT_FLAG_DEBUG_INFO = 4, // generate line and local-var info CORJIT_FLAG_MIN_OPT = 5, // disable all jit optimizations (not necesarily debuggable code) - CORJIT_FLAG_UNUSED1 = 6, + CORJIT_FLAG_ENABLE_CFG = 6, // generate CFG enabled code CORJIT_FLAG_MCJIT_BACKGROUND = 7, // Calling from multicore JIT background thread, do not call JitComplete CORJIT_FLAG_UNUSED2 = 8, CORJIT_FLAG_UNUSED3 = 9, diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ExternSymbolNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ExternSymbolNode.cs index 8f0e239fc1fb67..721e983ef56401 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ExternSymbolNode.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ExternSymbolNode.cs @@ -14,22 +14,24 @@ namespace ILCompiler.DependencyAnalysis /// Represents a symbol that is defined externally and statically linked to the output obj file. /// public class ExternSymbolNode : SortableDependencyNode, ISortableSymbolNode - { - private Utf8String _name; + { + private readonly Utf8String _name; + private readonly bool _isIndirection; - public ExternSymbolNode(Utf8String name) + public ExternSymbolNode(Utf8String name, bool isIndirection = false) { _name = name; + _isIndirection = isIndirection; } - protected override string GetName(NodeFactory factory) => $"ExternSymbol {_name.ToString()}"; + protected override string GetName(NodeFactory factory) => $"ExternSymbol {_name.ToString()}{(_isIndirection ? " (indirected)" : "")}"; public void AppendMangledName(NameMangler nameMangler, Utf8StringBuilder sb) { sb.Append(_name); } public int Offset => 0; - public virtual bool RepresentsIndirectionCell => false; + public virtual bool RepresentsIndirectionCell => _isIndirection; public override bool InterestingForDynamicDependencyAnalysis => false; public override bool HasDynamicDependencies => false; diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.cs index 67469a0f55cc68..24b45f2f1c7ed2 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.cs @@ -245,6 +245,10 @@ private void CreateNodeCaches() { return new ExternSymbolNode(name); }); + _externIndirectSymbols = new NodeCache((string name) => + { + return new ExternSymbolNode(name, isIndirection: true); + }); _pInvokeModuleFixups = new NodeCache((PInvokeModuleData moduleData) => { @@ -741,6 +745,13 @@ public ISortableSymbolNode ExternSymbol(string name) return _externSymbols.GetOrAdd(name); } + private NodeCache _externIndirectSymbols; + + public ISortableSymbolNode ExternIndirectSymbol(string name) + { + return _externIndirectSymbols.GetOrAdd(name); + } + private NodeCache _pInvokeModuleFixups; public ISymbolNode PInvokeModuleFixup(PInvokeModuleData moduleData) diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ObjectWriter.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ObjectWriter.cs index 5462b8fedc11ba..18e9e2b90221e7 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ObjectWriter.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ObjectWriter.cs @@ -795,7 +795,7 @@ public int EmitSymbolReference(ISymbolNode target, int delta, RelocType relocTyp // For now consider all method symbols address taken. // We could restrict this in the future to those that are referenced from // reflection tables, EH tables, were actually address taken in code, or are referenced from vtables. - if ((_options & ObjectWritingOptions.ControlFlowGuard) != 0 && target is IMethodNode) + if ((_options & ObjectWritingOptions.ControlFlowGuard) != 0 && (target is IMethodNode || target is AssemblyStubNode)) { flags |= SymbolRefFlags.AddressTakenFunction; } diff --git a/src/coreclr/tools/aot/ILCompiler.RyuJit/Compiler/RyuJitCompilationBuilder.cs b/src/coreclr/tools/aot/ILCompiler.RyuJit/Compiler/RyuJitCompilationBuilder.cs index bc29c680363f04..9f6f6eb400e33d 100644 --- a/src/coreclr/tools/aot/ILCompiler.RyuJit/Compiler/RyuJitCompilationBuilder.cs +++ b/src/coreclr/tools/aot/ILCompiler.RyuJit/Compiler/RyuJitCompilationBuilder.cs @@ -115,7 +115,10 @@ public override ICompilation ToCompilation() options |= RyuJitCompilationOptions.MethodBodyFolding; if ((_mitigationOptions & SecurityMitigationOptions.ControlFlowGuardAnnotations) != 0) + { + jitFlagBuilder.Add(CorJitFlag.CORJIT_FLAG_ENABLE_CFG); options |= RyuJitCompilationOptions.ControlFlowGuardAnnotations; + } if (_useDwarf5) options |= RyuJitCompilationOptions.UseDwarf5; diff --git a/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs b/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs index 4706aaa7f7901c..00c3ff26cf0762 100644 --- a/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs +++ b/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs @@ -692,6 +692,11 @@ private ISymbolNode GetHelperFtnUncached(CorInfoHelpFunc ftnNum) id = ReadyToRunHelper.GetCurrentManagedThreadId; break; + case CorInfoHelpFunc.CORINFO_HELP_VALIDATE_INDIRECT_CALL: + return _compilation.NodeFactory.ExternIndirectSymbol("__guard_check_icall_fptr"); + case CorInfoHelpFunc.CORINFO_HELP_DISPATCH_INDIRECT_CALL: + return _compilation.NodeFactory.ExternIndirectSymbol("__guard_dispatch_icall_fptr"); + default: throw new NotImplementedException(ftnNum.ToString()); } diff --git a/src/coreclr/vm/amd64/JitHelpers_Fast.asm b/src/coreclr/vm/amd64/JitHelpers_Fast.asm index 219597eb350c2f..0060ff036d3b0a 100644 --- a/src/coreclr/vm/amd64/JitHelpers_Fast.asm +++ b/src/coreclr/vm/amd64/JitHelpers_Fast.asm @@ -436,4 +436,17 @@ ProbeLoop: LEAF_END_MARKED JIT_StackProbe, _TEXT +LEAF_ENTRY JIT_ValidateIndirectCall, _TEXT + ret +LEAF_END JIT_ValidateIndirectCall, _TEXT + +LEAF_ENTRY JIT_DispatchIndirectCall, _TEXT +ifdef _DEBUG + mov r10, 0CDCDCDCDCDCDCDCDh ; The real helper clobbers these registers, so clobber them too in the fake helper + mov r11, 0CDCDCDCDCDCDCDCDh +endif + rexw jmp rax +LEAF_END JIT_DispatchIndirectCall, _TEXT + + end diff --git a/src/coreclr/vm/amd64/jithelpers_fast.S b/src/coreclr/vm/amd64/jithelpers_fast.S index 8109886d0c9696..42eb50836a526b 100644 --- a/src/coreclr/vm/amd64/jithelpers_fast.S +++ b/src/coreclr/vm/amd64/jithelpers_fast.S @@ -447,3 +447,13 @@ LOCAL_LABEL(ProbeLoop): ret LEAF_END_MARKED JIT_StackProbe, _TEXT + +LEAF_ENTRY JIT_ValidateIndirectCall, _TEST + ret +LEAF_END JIT_ValidateIndirectCall, _TEST + +LEAF_ENTRY JIT_DispatchIndirectCall, _TEST + mov r10, 0xCDCDCDCDCDCDCDCD // The real helper clobbers these registers, so clobber them too in the fake helper + mov r11, 0xCDCDCDCDCDCDCDCD + rex64 jmp rax +LEAF_END JIT_DispatchIndirectCall, _TEST diff --git a/src/coreclr/vm/arm64/asmhelpers.S b/src/coreclr/vm/arm64/asmhelpers.S index e7725cd16e63b6..941af8f3d00cae 100644 --- a/src/coreclr/vm/arm64/asmhelpers.S +++ b/src/coreclr/vm/arm64/asmhelpers.S @@ -1020,3 +1020,11 @@ NESTED_ENTRY OnCallCountThresholdReachedStub, _TEXT, NoHandler NESTED_END OnCallCountThresholdReachedStub, _TEXT #endif // FEATURE_TIERED_COMPILATION + +LEAF_ENTRY JIT_ValidateIndirectCall, _TEXT + ret lr +LEAF_END JIT_ValidateIndirectCall, _TEXT + +LEAF_ENTRY JIT_DispatchIndirectCall, _TEXT + br x15 +LEAF_END JIT_DispatchIndirectCall, _TEXT diff --git a/src/coreclr/vm/arm64/asmhelpers.asm b/src/coreclr/vm/arm64/asmhelpers.asm index 6bbdef2c2d936b..d73b8f5ff6ff7d 100644 --- a/src/coreclr/vm/arm64/asmhelpers.asm +++ b/src/coreclr/vm/arm64/asmhelpers.asm @@ -1435,5 +1435,13 @@ __HelperNakedFuncName SETS "$helper":CC:"Naked" #endif ; FEATURE_TIERED_COMPILATION + LEAF_ENTRY JIT_ValidateIndirectCall + ret lr + LEAF_END + + LEAF_ENTRY JIT_DispatchIndirectCall + br x15 + LEAF_END + ; Must be at very end of file END diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp index cd8ab0933e9229..11b9ff7bf7f5d7 100644 --- a/src/coreclr/vm/jithelpers.cpp +++ b/src/coreclr/vm/jithelpers.cpp @@ -5669,6 +5669,10 @@ HCIMPL1_RAW(void, JIT_ReversePInvokeExit, ReversePInvokeFrame* frame) } HCIMPLEND_RAW +// These two do take args but have a custom calling convention. +EXTERN_C void JIT_ValidateIndirectCall(); +EXTERN_C void JIT_DispatchIndirectCall(); + //======================================================================== // // JIT HELPERS INITIALIZATION diff --git a/src/tests/Common/testenvironment.proj b/src/tests/Common/testenvironment.proj index aae081a1871265..97cfd88fb6ce00 100644 --- a/src/tests/Common/testenvironment.proj +++ b/src/tests/Common/testenvironment.proj @@ -63,6 +63,8 @@ COMPlus_JitRandomGuardedDevirtualization; COMPlus_JitRandomEdgeCounts; COMPlus_JitRandomOnStackReplacement; + COMPlus_JitForceControlFlowGuard; + COMPlus_JitCFGUseDispatcher; RunningIlasmRoundTrip @@ -195,6 +197,10 @@ + + + +