diff --git a/src/coreclr/debug/runtimeinfo/datadescriptor.h b/src/coreclr/debug/runtimeinfo/datadescriptor.h index 0e2cdc7a394344..1fc2e06d1a2719 100644 --- a/src/coreclr/debug/runtimeinfo/datadescriptor.h +++ b/src/coreclr/debug/runtimeinfo/datadescriptor.h @@ -109,7 +109,7 @@ CDAC_TYPE_FIELD(Thread, /*uint32*/, Id, cdac_offsets::Id) CDAC_TYPE_FIELD(Thread, /*nuint*/, OSId, cdac_offsets::OSId) CDAC_TYPE_FIELD(Thread, /*uint32*/, State, cdac_offsets::State) CDAC_TYPE_FIELD(Thread, /*uint32*/, PreemptiveGCDisabled, cdac_offsets::PreemptiveGCDisabled) -CDAC_TYPE_FIELD(Thread, /*pointer*/, AllocContext, cdac_offsets::AllocContext) +CDAC_TYPE_FIELD(Thread, /*pointer*/, RuntimeThreadLocals, cdac_offsets::RuntimeThreadLocals) CDAC_TYPE_FIELD(Thread, /*pointer*/, Frame, cdac_offsets::Frame) CDAC_TYPE_FIELD(Thread, /*pointer*/, ExceptionTracker, cdac_offsets::ExceptionTracker) CDAC_TYPE_FIELD(Thread, GCHandle, GCHandle, cdac_offsets::ExposedObject) @@ -130,6 +130,11 @@ CDAC_TYPE_FIELD(ThreadStore, /*int32*/, PendingCount, cdac_offsets: CDAC_TYPE_FIELD(ThreadStore, /*int32*/, DeadCount, cdac_offsets::DeadCount) CDAC_TYPE_END(ThreadStore) +CDAC_TYPE_BEGIN(RuntimeThreadLocals) +CDAC_TYPE_INDETERMINATE(RuntimeThreadLocals) +CDAC_TYPE_FIELD(RuntimeThreadLocals, AllocContext, AllocContext, offsetof(RuntimeThreadLocals, alloc_context)) +CDAC_TYPE_END(RuntimeThreadLocals) + CDAC_TYPE_BEGIN(GCAllocContext) CDAC_TYPE_INDETERMINATE(GCAllocContext) CDAC_TYPE_FIELD(GCAllocContext, /*pointer*/, Pointer, offsetof(gc_alloc_context, alloc_ptr)) diff --git a/src/coreclr/nativeaot/Runtime/DebugHeader.cpp b/src/coreclr/nativeaot/Runtime/DebugHeader.cpp index d6f34b10708e53..324e0f86f2aea8 100644 --- a/src/coreclr/nativeaot/Runtime/DebugHeader.cpp +++ b/src/coreclr/nativeaot/Runtime/DebugHeader.cpp @@ -192,12 +192,12 @@ extern "C" void PopulateDebugHeaders() MAKE_SIZE_ENTRY(ThreadStore); MAKE_DEBUG_FIELD_ENTRY(ThreadStore, m_ThreadList); - MAKE_SIZE_ENTRY(ThreadBuffer); - MAKE_DEBUG_FIELD_ENTRY(ThreadBuffer, m_pNext); - MAKE_DEBUG_FIELD_ENTRY(ThreadBuffer, m_rgbAllocContextBuffer); - MAKE_DEBUG_FIELD_ENTRY(ThreadBuffer, m_threadId); - MAKE_DEBUG_FIELD_ENTRY(ThreadBuffer, m_pThreadStressLog); - MAKE_DEBUG_FIELD_ENTRY(ThreadBuffer, m_pExInfoStackHead); + MAKE_SIZE_ENTRY(RuntimeThreadLocals); + MAKE_DEBUG_FIELD_ENTRY(RuntimeThreadLocals, m_pNext); + MAKE_DEBUG_FIELD_ENTRY(RuntimeThreadLocals, m_rgbAllocContextBuffer); + MAKE_DEBUG_FIELD_ENTRY(RuntimeThreadLocals, m_threadId); + MAKE_DEBUG_FIELD_ENTRY(RuntimeThreadLocals, m_pThreadStressLog); + MAKE_DEBUG_FIELD_ENTRY(RuntimeThreadLocals, m_pExInfoStackHead); MAKE_SIZE_ENTRY(ExInfo); MAKE_DEBUG_FIELD_ENTRY(ExInfo, m_pPrevExInfo); diff --git a/src/coreclr/nativeaot/Runtime/thread.h b/src/coreclr/nativeaot/Runtime/thread.h index abe095b994a398..f5a1c82e59697d 100644 --- a/src/coreclr/nativeaot/Runtime/thread.h +++ b/src/coreclr/nativeaot/Runtime/thread.h @@ -83,7 +83,7 @@ struct InlinedThreadStaticRoot TypeManager* m_typeManager; }; -struct ThreadBuffer +struct RuntimeThreadLocals { uint8_t m_rgbAllocContextBuffer[SIZEOF_ALLOC_CONTEXT]; uint32_t volatile m_ThreadStateFlags; // see Thread::ThreadStateFlags enum @@ -126,7 +126,7 @@ struct ReversePInvokeFrame Thread* m_savedThread; }; -class Thread : private ThreadBuffer +class Thread : private RuntimeThreadLocals { friend class AsmOffsets; friend struct DefaultSListTraits; @@ -158,7 +158,7 @@ class Thread : private ThreadBuffer // For suspension APCs it is mostly harmless, but wasteful and in extreme // cases may force the target thread into stack oveflow. // We use this flag to avoid sending another APC when one is still going through. - // + // // On Unix this is an optimization to not queue up more signals when one is // still being processed. }; diff --git a/src/coreclr/nativeaot/Runtime/threadstore.cpp b/src/coreclr/nativeaot/Runtime/threadstore.cpp index 63bb947e2baa9b..c46ff78a9bbf39 100644 --- a/src/coreclr/nativeaot/Runtime/threadstore.cpp +++ b/src/coreclr/nativeaot/Runtime/threadstore.cpp @@ -431,13 +431,13 @@ FCIMPL1(void, RhpCancelThreadAbort, void* thread) } FCIMPLEND -C_ASSERT(sizeof(Thread) == sizeof(ThreadBuffer)); +C_ASSERT(sizeof(Thread) == sizeof(RuntimeThreadLocals)); #ifndef _MSC_VER -__thread ThreadBuffer tls_CurrentThread; +__thread RuntimeThreadLocals tls_CurrentThread; #endif -EXTERN_C ThreadBuffer* RhpGetThread() +EXTERN_C RuntimeThreadLocals* RhpGetThread() { return &tls_CurrentThread; } diff --git a/src/coreclr/nativeaot/Runtime/threadstore.inl b/src/coreclr/nativeaot/Runtime/threadstore.inl index 29495046a98272..5b4701249eec4b 100644 --- a/src/coreclr/nativeaot/Runtime/threadstore.inl +++ b/src/coreclr/nativeaot/Runtime/threadstore.inl @@ -3,9 +3,9 @@ #ifdef _MSC_VER // a workaround to prevent tls_CurrentThread from becoming dynamically checked/initialized. -EXTERN_C __declspec(selectany) __declspec(thread) ThreadBuffer tls_CurrentThread; +EXTERN_C __declspec(selectany) __declspec(thread) RuntimeThreadLocals tls_CurrentThread; #else -EXTERN_C __thread ThreadBuffer tls_CurrentThread; +EXTERN_C __thread RuntimeThreadLocals tls_CurrentThread; #endif // static diff --git a/src/coreclr/vm/CMakeLists.txt b/src/coreclr/vm/CMakeLists.txt index ab5e4317ae63d2..cd68323da77007 100644 --- a/src/coreclr/vm/CMakeLists.txt +++ b/src/coreclr/vm/CMakeLists.txt @@ -623,7 +623,6 @@ if(CLR_CMAKE_TARGET_ARCH_AMD64) ${ARCH_SOURCES_DIR}/GenericCLRToCOMCallStubs.asm ${ARCH_SOURCES_DIR}/getstate.asm ${ARCH_SOURCES_DIR}/JitHelpers_Fast.asm - ${ARCH_SOURCES_DIR}/JitHelpers_FastMP.asm ${ARCH_SOURCES_DIR}/JitHelpers_FastWriteBarriers.asm ${ARCH_SOURCES_DIR}/JitHelpers_SingleAppDomain.asm ${ARCH_SOURCES_DIR}/JitHelpers_Slow.asm diff --git a/src/coreclr/vm/amd64/AsmMacros.inc b/src/coreclr/vm/amd64/AsmMacros.inc index 8e67aedd9c6dc0..2d14b9c31e8fca 100644 --- a/src/coreclr/vm/amd64/AsmMacros.inc +++ b/src/coreclr/vm/amd64/AsmMacros.inc @@ -206,26 +206,6 @@ INLINE_GETTHREAD macro Reg endm -; -; Inlined macro to get the current thread's allocation context -; Trashes rax and r11 -; - -INLINE_GET_ALLOC_CONTEXT macro Reg - - EXTERN _tls_index: DWORD - EXTERN t_thread_alloc_context: DWORD - - mov r11d, [_tls_index] - mov rax, gs:[OFFSET__TEB__ThreadLocalStoragePointer] - mov rax, [rax + r11 * 8] - mov r11d, SECTIONREL t_thread_alloc_context - add rax, r11 - mov Reg, rax - - endm - - ; if you change this code there will be corresponding code in JITInterfaceGen.cpp which will need to be changed ; diff --git a/src/coreclr/vm/amd64/JitHelpers_FastMP.asm b/src/coreclr/vm/amd64/JitHelpers_FastMP.asm deleted file mode 100644 index 9849b8d8016d70..00000000000000 --- a/src/coreclr/vm/amd64/JitHelpers_FastMP.asm +++ /dev/null @@ -1,75 +0,0 @@ -; Licensed to the .NET Foundation under one or more agreements. -; The .NET Foundation licenses this file to you under the MIT license. - -; *********************************************************************** -; File: JitHelpers_InlineGetThread.asm, see history in jithelp.asm -; -; *********************************************************************** - -include AsmMacros.inc -include asmconstants.inc - -CopyValueClassUnchecked equ ?CopyValueClassUnchecked@@YAXPEAX0PEAVMethodTable@@@Z -JIT_Box equ ?JIT_Box@@YAPEAVObject@@PEAUCORINFO_CLASS_STRUCT_@@PEAX@Z - -extern CopyValueClassUnchecked:proc -extern JIT_Box:proc - -; HCIMPL2(Object*, JIT_Box, CORINFO_CLASS_HANDLE type, void* unboxedData) -NESTED_ENTRY JIT_BoxFastMP, _TEXT - - ; m_BaseSize is guaranteed to be a multiple of 8. - mov r8d, [rcx + OFFSET__MethodTable__m_BaseSize] - - INLINE_GET_ALLOC_CONTEXT r11 - mov r10, [r11 + OFFSETOF__gc_alloc_context__alloc_limit] - mov rax, [r11 + OFFSETOF__gc_alloc_context__alloc_ptr] - - add r8, rax - - cmp r8, r10 - ja AllocFailed - - test rdx, rdx - je NullRef - - mov [r11 + OFFSETOF__gc_alloc_context__alloc_ptr], r8 - mov [rax], rcx - - ; Check whether the object contains pointers - test dword ptr [rcx + OFFSETOF__MethodTable__m_dwFlags], MethodTable__enum_flag_ContainsPointers - jnz ContainsPointers - - ; We have no pointers - emit a simple inline copy loop - ; Copy the contents from the end - mov ecx, [rcx + OFFSET__MethodTable__m_BaseSize] - sub ecx, 18h ; sizeof(ObjHeader) + sizeof(Object) + last slot - -align 16 - CopyLoop: - mov r8, [rdx+rcx] - mov [rax+rcx+8], r8 - sub ecx, 8 - jge CopyLoop - REPRET - - ContainsPointers: - ; Do call to CopyValueClassUnchecked(object, data, pMT) - push_vol_reg rax - alloc_stack 20h - END_PROLOGUE - - mov r8, rcx - lea rcx, [rax + 8] - call CopyValueClassUnchecked - - add rsp, 20h - pop rax - ret - - AllocFailed: - NullRef: - jmp JIT_Box -NESTED_END JIT_BoxFastMP, _TEXT - - end diff --git a/src/coreclr/vm/arm/stubs.cpp b/src/coreclr/vm/arm/stubs.cpp index ab18568159e072..e6302b08bc3c8b 100644 --- a/src/coreclr/vm/arm/stubs.cpp +++ b/src/coreclr/vm/arm/stubs.cpp @@ -1776,6 +1776,7 @@ void InitJITHelpers1() SetJitHelperFunction(CORINFO_HELP_NEWSFAST, JIT_NewS_MP_FastPortable); SetJitHelperFunction(CORINFO_HELP_NEWARR_1_VC, JIT_NewArr1VC_MP_FastPortable); SetJitHelperFunction(CORINFO_HELP_NEWARR_1_OBJ, JIT_NewArr1OBJ_MP_FastPortable); + SetJitHelperFunction(CORINFO_HELP_BOX, JIT_Box_MP_FastPortable); ECall::DynamicallyAssignFCallImpl(GetEEFuncEntryPoint(AllocateString_MP_FastPortable), ECall::FastAllocateString); } diff --git a/src/coreclr/vm/arm64/stubs.cpp b/src/coreclr/vm/arm64/stubs.cpp index de33ce61be849e..d1c41a1309f6f7 100644 --- a/src/coreclr/vm/arm64/stubs.cpp +++ b/src/coreclr/vm/arm64/stubs.cpp @@ -895,6 +895,7 @@ void InitJITHelpers1() SetJitHelperFunction(CORINFO_HELP_NEWSFAST_ALIGN8, JIT_NewS_MP_FastPortable); SetJitHelperFunction(CORINFO_HELP_NEWARR_1_VC, JIT_NewArr1VC_MP_FastPortable); SetJitHelperFunction(CORINFO_HELP_NEWARR_1_OBJ, JIT_NewArr1OBJ_MP_FastPortable); + SetJitHelperFunction(CORINFO_HELP_BOX, JIT_Box_MP_FastPortable); ECall::DynamicallyAssignFCallImpl(GetEEFuncEntryPoint(AllocateString_MP_FastPortable), ECall::FastAllocateString); } diff --git a/src/coreclr/vm/comutilnative.cpp b/src/coreclr/vm/comutilnative.cpp index 1b768de4f92f53..143174cc24ce70 100644 --- a/src/coreclr/vm/comutilnative.cpp +++ b/src/coreclr/vm/comutilnative.cpp @@ -925,7 +925,7 @@ FCIMPL0(INT64, GCInterface::GetAllocatedBytesForCurrentThread) INT64 currentAllocated = 0; Thread *pThread = GetThread(); - gc_alloc_context* ac = &t_thread_alloc_context; + gc_alloc_context* ac = &t_runtime_thread_locals.alloc_context; currentAllocated = ac->alloc_bytes + ac->alloc_bytes_uoh - (ac->alloc_limit - ac->alloc_ptr); return currentAllocated; diff --git a/src/coreclr/vm/gccover.cpp b/src/coreclr/vm/gccover.cpp index 67d4bdf4e25521..24036ee75b3733 100644 --- a/src/coreclr/vm/gccover.cpp +++ b/src/coreclr/vm/gccover.cpp @@ -552,7 +552,7 @@ void GCCoverageInfo::SprinkleBreakpoints( { case InstructionType::Call_IndirectUnconditional: #ifdef TARGET_AMD64 - if(!(EECodeManager::InterruptibleSafePointsEnabled() && safePointDecoder.AreSafePointsInterruptible()) && + if(!(EECodeManager::InterruptibleSafePointsEnabled() && safePointDecoder.AreSafePointsInterruptible()) && safePointDecoder.IsSafePoint((UINT32)(cur + len - codeStart + regionOffsetAdj))) #endif { @@ -1349,7 +1349,7 @@ void RemoveGcCoverageInterrupt(TADDR instrPtr, BYTE * savedInstrPtr, GCCoverageI #endif #ifdef TARGET_X86 - // Epilog checking relies on precise control of when instrumentation for the first prolog + // Epilog checking relies on precise control of when instrumentation for the first prolog // instruction is enabled or disabled. In particular, if a function has multiple epilogs, or // the first execution of the function terminates via an exception, and subsequent completions // do not, then the function may trigger a false stress fault if epilog checks are not disabled. @@ -1859,7 +1859,7 @@ void DoGcStress (PCONTEXT regs, NativeCodeVersion nativeCodeVersion) // BUG(github #10318) - when not using allocation contexts, the alloc lock // must be acquired here. Until fixed, this assert prevents random heap corruption. assert(GCHeapUtilities::UseThreadAllocationContexts()); - GCHeapUtilities::GetGCHeap()->StressHeap(&t_thread_alloc_context); + GCHeapUtilities::GetGCHeap()->StressHeap(&t_runtime_thread_locals.alloc_context); // StressHeap can exit early w/o forcing a SuspendEE to trigger the instruction update // We can not rely on the return code to determine if the instruction update happened diff --git a/src/coreclr/vm/gcenv.ee.cpp b/src/coreclr/vm/gcenv.ee.cpp index 3233eb2add6691..40f619428389df 100644 --- a/src/coreclr/vm/gcenv.ee.cpp +++ b/src/coreclr/vm/gcenv.ee.cpp @@ -443,7 +443,7 @@ gc_alloc_context * GCToEEInterface::GetAllocContext() return nullptr; } - return &t_thread_alloc_context; + return &t_runtime_thread_locals.alloc_context; } void GCToEEInterface::GcEnumAllocContexts(enum_alloc_context_func* fn, void* param) diff --git a/src/coreclr/vm/gcheaputilities.cpp b/src/coreclr/vm/gcheaputilities.cpp index a365300be4f61a..cd0259eef45d83 100644 --- a/src/coreclr/vm/gcheaputilities.cpp +++ b/src/coreclr/vm/gcheaputilities.cpp @@ -43,12 +43,6 @@ bool g_sw_ww_enabled_for_gc_heap = false; GVAL_IMPL_INIT(gc_alloc_context, g_global_alloc_context, {}); -// on MP systems, each thread has its own allocation chunk so we can avoid -// lock prefixes and expensive MP cache snooping stuff -#ifndef _MSC_VER -__thread gc_alloc_context t_thread_alloc_context; -#endif - enum GC_LOAD_STATUS { GC_LOAD_STATUS_BEFORE_START, GC_LOAD_STATUS_START, @@ -182,9 +176,9 @@ HMODULE LoadStandaloneGc(LPCWSTR libFileName, LPCWSTR libFilePath) // The libFileName originates either from an environment variable or from the runtimeconfig.json // These are trusted locations, and therefore even if it is a relative path, there is no security risk. // - // However, users often don't know the absolute path to their coreclr module, especially on production. + // However, users often don't know the absolute path to their coreclr module, especially on production. // Therefore we allow referencing it from an arbitrary location through libFilePath instead. Users, however - // are warned that they should keep the file in a secure location such that it cannot be tampered. + // are warned that they should keep the file in a secure location such that it cannot be tampered. // if (!ValidateModuleName(libFileName)) { diff --git a/src/coreclr/vm/gcheaputilities.h b/src/coreclr/vm/gcheaputilities.h index c20c574d470619..c652cc52bf417c 100644 --- a/src/coreclr/vm/gcheaputilities.h +++ b/src/coreclr/vm/gcheaputilities.h @@ -26,14 +26,6 @@ GVAL_DECL(gc_alloc_context, g_global_alloc_context); } #endif // !DACCESS_COMPILE -// on MP systems, each thread has its own allocation chunk so we can avoid -// lock prefixes and expensive MP cache snooping stuff -#ifdef _MSC_VER -EXTERN_C __declspec(selectany) __declspec(thread) gc_alloc_context t_thread_alloc_context; -#else -EXTERN_C __thread gc_alloc_context t_thread_alloc_context; -#endif - extern "C" uint32_t* g_card_bundle_table; extern "C" uint8_t* g_ephemeral_low; extern "C" uint8_t* g_ephemeral_high; diff --git a/src/coreclr/vm/gchelpers.cpp b/src/coreclr/vm/gchelpers.cpp index 79bbf57510a612..06db3076ef4ebf 100644 --- a/src/coreclr/vm/gchelpers.cpp +++ b/src/coreclr/vm/gchelpers.cpp @@ -46,7 +46,7 @@ inline gc_alloc_context* GetThreadAllocContext() assert(GCHeapUtilities::UseThreadAllocationContexts()); - return &t_thread_alloc_context; + return &t_runtime_thread_locals.alloc_context; } // When not using per-thread allocation contexts, we (the EE) need to take care that @@ -1484,4 +1484,4 @@ void ErectWriteBarrierForMT(MethodTable **dst, MethodTable *ref) } } } -} \ No newline at end of file +} diff --git a/src/coreclr/vm/gcstress.h b/src/coreclr/vm/gcstress.h index 3cd7894d9b3efb..8fe0b962007fe9 100644 --- a/src/coreclr/vm/gcstress.h +++ b/src/coreclr/vm/gcstress.h @@ -289,7 +289,7 @@ namespace _GCStress // BUG(github #10318) - when not using allocation contexts, the alloc lock // must be acquired here. Until fixed, this assert prevents random heap corruption. _ASSERTE(GCHeapUtilities::UseThreadAllocationContexts()); - GCHeapUtilities::GetGCHeap()->StressHeap(&t_thread_alloc_context); + GCHeapUtilities::GetGCHeap()->StressHeap(&t_runtime_thread_locals.alloc_context); } FORCEINLINE diff --git a/src/coreclr/vm/i386/stublinkerx86.cpp b/src/coreclr/vm/i386/stublinkerx86.cpp index f5a694a047ff68..fd1346b8f339fc 100644 --- a/src/coreclr/vm/i386/stublinkerx86.cpp +++ b/src/coreclr/vm/i386/stublinkerx86.cpp @@ -2434,7 +2434,7 @@ namespace { gc_alloc_context* STDCALL GetAllocContextHelper() { - return &t_thread_alloc_context; + return &t_runtime_thread_locals.alloc_context; } } #endif @@ -2490,8 +2490,8 @@ VOID StubLinkerCPU::X86EmitCurrentThreadAllocContextFetch(X86Reg dstreg, unsigne X86EmitIndexRegLoad(dstreg, dstreg, sizeof(void *) * _tls_index); - _ASSERTE(Thread::GetOffsetOfThreadStatic(&t_thread_alloc_context) < INT_MAX); - X86EmitAddReg(dstreg, (int32_t)Thread::GetOffsetOfThreadStatic(&t_thread_alloc_context)); + _ASSERTE(Thread::GetOffsetOfThreadStatic(&t_runtime_thread_locals.alloc_context) < INT_MAX); + X86EmitAddReg(dstreg, (int32_t)Thread::GetOffsetOfThreadStatic(&t_runtime_thread_locals.alloc_context)); #endif // TARGET_UNIX } diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp index efdd2de796232b..4d6127241bddbc 100644 --- a/src/coreclr/vm/jithelpers.cpp +++ b/src/coreclr/vm/jithelpers.cpp @@ -1668,7 +1668,7 @@ HCIMPL1_RAW(Object*, JIT_NewS_MP_FastPortable, CORINFO_CLASS_HANDLE typeHnd_) } CONTRACTL_END; _ASSERTE(GCHeapUtilities::UseThreadAllocationContexts()); - gc_alloc_context *allocContext = &t_thread_alloc_context; + gc_alloc_context *allocContext = &t_runtime_thread_locals.alloc_context; TypeHandle typeHandle(typeHnd_); _ASSERTE(!typeHandle.IsTypeDesc()); // heap objects must have method tables @@ -1785,7 +1785,7 @@ HCIMPL1_RAW(StringObject*, AllocateString_MP_FastPortable, DWORD stringLength) return HCCALL1(FramedAllocateString, stringLength); } - gc_alloc_context *allocContext = &t_thread_alloc_context; + gc_alloc_context *allocContext = &t_runtime_thread_locals.alloc_context; SIZE_T totalSize = StringObject::GetSize(stringLength); @@ -1901,7 +1901,7 @@ HCIMPL2_RAW(Object*, JIT_NewArr1VC_MP_FastPortable, CORINFO_CLASS_HANDLE arrayMT return HCCALL2(JIT_NewArr1, arrayMT, size); } - gc_alloc_context *allocContext = &t_thread_alloc_context; + gc_alloc_context *allocContext = &t_runtime_thread_locals.alloc_context; MethodTable *pArrayMT = (MethodTable *)arrayMT; @@ -1959,11 +1959,6 @@ HCIMPL2_RAW(Object*, JIT_NewArr1OBJ_MP_FastPortable, CORINFO_CLASS_HANDLE arrayM return HCCALL2(JIT_NewArr1, arrayMT, size); } - // This is typically the only call in the fast path. Making the call early seems to be better, as it allows the compiler - // to use volatile registers for intermediate values. This reduces the number of push/pop instructions and eliminates - // some reshuffling of intermediate values into nonvolatile registers around the call. - Thread *thread = GetThread(); - SIZE_T totalSize = componentCount * sizeof(void *); _ASSERTE(totalSize / sizeof(void *) == componentCount); @@ -1975,7 +1970,7 @@ HCIMPL2_RAW(Object*, JIT_NewArr1OBJ_MP_FastPortable, CORINFO_CLASS_HANDLE arrayM _ASSERTE(ALIGN_UP(totalSize, DATA_ALIGNMENT) == totalSize); - gc_alloc_context *allocContext = &t_thread_alloc_context; + gc_alloc_context *allocContext = &t_runtime_thread_locals.alloc_context; BYTE *allocPtr = allocContext->alloc_ptr; _ASSERTE(allocPtr <= allocContext->alloc_limit); if (totalSize > static_cast(allocContext->alloc_limit - allocPtr)) @@ -2109,14 +2104,67 @@ HCIMPLEND // VALUETYPE/BYREF HELPERS // //======================================================================== +/*************************************************************/ +HCIMPL2_RAW(Object*, JIT_Box_MP_FastPortable, CORINFO_CLASS_HANDLE type, void* unboxedData) +{ + CONTRACTL { + THROWS; + DISABLED(GC_TRIGGERS); + MODE_COOPERATIVE; + } CONTRACTL_END; + + if (unboxedData == nullptr) + { + // Tail call to the slow helper + return HCCALL2(JIT_Box, type, unboxedData); + } + + _ASSERTE(GCHeapUtilities::UseThreadAllocationContexts()); + gc_alloc_context *allocContext = &t_runtime_thread_locals.alloc_context; + + TypeHandle typeHandle(type); + _ASSERTE(!typeHandle.IsTypeDesc()); // heap objects must have method tables + MethodTable *methodTable = typeHandle.AsMethodTable(); + // The fast helper should never be called for nullable types. + _ASSERTE(!methodTable->IsNullable()); + +#ifdef FEATURE_64BIT_ALIGNMENT + if (methodTable->RequiresAlign8()) + { + return HCCALL2(JIT_Box, type, unboxedData); + } +#endif + + SIZE_T size = methodTable->GetBaseSize(); + _ASSERTE(size % DATA_ALIGNMENT == 0); + + BYTE *allocPtr = allocContext->alloc_ptr; + _ASSERTE(allocPtr <= allocContext->alloc_limit); + if (size > static_cast(allocContext->alloc_limit - allocPtr)) + { + // Tail call to the slow helper + return HCCALL2(JIT_Box, type, unboxedData); + } + + allocContext->alloc_ptr = allocPtr + size; + + _ASSERTE(allocPtr != nullptr); + Object *object = reinterpret_cast(allocPtr); + _ASSERTE(object->HasEmptySyncBlockInfo()); + object->SetMethodTable(methodTable); + + // Copy the data into the object + CopyValueClass(object->UnBox(), unboxedData, methodTable); + + return object; +} +HCIMPLEND_RAW /*************************************************************/ HCIMPL2(Object*, JIT_Box, CORINFO_CLASS_HANDLE type, void* unboxedData) { FCALL_CONTRACT; - // TODO: if we care, we could do a fast trial allocation - // and avoid the building the frame most times OBJECTREF newobj = NULL; HELPER_METHOD_FRAME_BEGIN_RET_NOPOLL(); // Set up a frame GCPROTECT_BEGININTERIOR(unboxedData); diff --git a/src/coreclr/vm/jitinterfacegen.cpp b/src/coreclr/vm/jitinterfacegen.cpp index 6cb3e4dead267f..1d55e308402fe3 100644 --- a/src/coreclr/vm/jitinterfacegen.cpp +++ b/src/coreclr/vm/jitinterfacegen.cpp @@ -65,6 +65,7 @@ void InitJITHelpers1() #ifdef TARGET_UNIX SetJitHelperFunction(CORINFO_HELP_NEWSFAST, JIT_NewS_MP_FastPortable); SetJitHelperFunction(CORINFO_HELP_NEWSFAST_ALIGN8, JIT_NewS_MP_FastPortable); + SetJitHelperFunction(CORINFO_HELP_BOX, JIT_Box_MP_FastPortable); SetJitHelperFunction(CORINFO_HELP_NEWARR_1_VC, JIT_NewArr1VC_MP_FastPortable); SetJitHelperFunction(CORINFO_HELP_NEWARR_1_OBJ, JIT_NewArr1OBJ_MP_FastPortable); @@ -75,7 +76,7 @@ void InitJITHelpers1() { SetJitHelperFunction(CORINFO_HELP_NEWSFAST, JIT_NewS_MP_FastPortable); SetJitHelperFunction(CORINFO_HELP_NEWSFAST_ALIGN8, JIT_NewS_MP_FastPortable); - SetJitHelperFunction(CORINFO_HELP_BOX, JIT_BoxFastMP); + SetJitHelperFunction(CORINFO_HELP_BOX, JIT_Box_MP_FastPortable); SetJitHelperFunction(CORINFO_HELP_NEWARR_1_VC, JIT_NewArr1VC_MP_FastPortable); SetJitHelperFunction(CORINFO_HELP_NEWARR_1_OBJ, JIT_NewArr1OBJ_MP_FastPortable); diff --git a/src/coreclr/vm/loongarch64/stubs.cpp b/src/coreclr/vm/loongarch64/stubs.cpp index 4de3341abdcc16..a4006f99e94f7a 100644 --- a/src/coreclr/vm/loongarch64/stubs.cpp +++ b/src/coreclr/vm/loongarch64/stubs.cpp @@ -930,6 +930,7 @@ void InitJITHelpers1() SetJitHelperFunction(CORINFO_HELP_NEWSFAST_ALIGN8, JIT_NewS_MP_FastPortable); SetJitHelperFunction(CORINFO_HELP_NEWARR_1_VC, JIT_NewArr1VC_MP_FastPortable); SetJitHelperFunction(CORINFO_HELP_NEWARR_1_OBJ, JIT_NewArr1OBJ_MP_FastPortable); + SetJitHelperFunction(CORINFO_HELP_BOX, JIT_Box_MP_FastPortable); ECall::DynamicallyAssignFCallImpl(GetEEFuncEntryPoint(AllocateString_MP_FastPortable), ECall::FastAllocateString); } diff --git a/src/coreclr/vm/riscv64/stubs.cpp b/src/coreclr/vm/riscv64/stubs.cpp index 078fb9950073ef..32d4dc088c4394 100644 --- a/src/coreclr/vm/riscv64/stubs.cpp +++ b/src/coreclr/vm/riscv64/stubs.cpp @@ -827,6 +827,7 @@ void InitJITHelpers1() SetJitHelperFunction(CORINFO_HELP_NEWSFAST_ALIGN8, JIT_NewS_MP_FastPortable); SetJitHelperFunction(CORINFO_HELP_NEWARR_1_VC, JIT_NewArr1VC_MP_FastPortable); SetJitHelperFunction(CORINFO_HELP_NEWARR_1_OBJ, JIT_NewArr1OBJ_MP_FastPortable); + SetJitHelperFunction(CORINFO_HELP_BOX, JIT_Box_MP_FastPortable); ECall::DynamicallyAssignFCallImpl(GetEEFuncEntryPoint(AllocateString_MP_FastPortable), ECall::FastAllocateString); } diff --git a/src/coreclr/vm/threads.cpp b/src/coreclr/vm/threads.cpp index bda0d9ad8221ca..cad2c8bc3d5787 100644 --- a/src/coreclr/vm/threads.cpp +++ b/src/coreclr/vm/threads.cpp @@ -58,6 +58,10 @@ TailCallTls::TailCallTls() { } +#ifndef _MSC_VER +thread_local RuntimeThreadLocals t_runtime_thread_locals; +#endif + Thread* STDCALL GetThreadHelper() { return GetThreadNULLOk(); @@ -354,7 +358,7 @@ void SetThread(Thread* t) { InitializeCurrentThreadsStaticData(t); EnsureTlsDestructionMonitor(); - t->InitAllocContext(); + t->InitRuntimeThreadLocals(); } // Clear or set the app domain to the one domain based on if the thread is being nulled out or set @@ -964,12 +968,12 @@ HRESULT Thread::DetachThread(BOOL fDLLThreadDetach) GCX_COOP(); // GetTotalAllocatedBytes reads dead_threads_non_alloc_bytes, but will suspend EE, being in COOP mode we cannot race with that // however, there could be other threads terminating and doing the same Add. - InterlockedExchangeAdd64((LONG64*)&dead_threads_non_alloc_bytes, t_thread_alloc_context.alloc_limit - t_thread_alloc_context.alloc_ptr); - GCHeapUtilities::GetGCHeap()->FixAllocContext(&t_thread_alloc_context, NULL, NULL); - t_thread_alloc_context.init(); // re-initialize the context. + InterlockedExchangeAdd64((LONG64*)&dead_threads_non_alloc_bytes, t_runtime_thread_locals.alloc_context.alloc_limit - t_runtime_thread_locals.alloc_context.alloc_ptr); + GCHeapUtilities::GetGCHeap()->FixAllocContext(&t_runtime_thread_locals.alloc_context, NULL, NULL); + t_runtime_thread_locals.alloc_context.init(); // re-initialize the context. // Clear out the alloc context pointer for this thread. When TLS is gone, this pointer will point into freed memory. - m_alloc_context = nullptr; + m_pRuntimeThreadLocals = nullptr; } // We need to make sure that TLS are touched last here. @@ -1380,7 +1384,7 @@ Thread::Thread() m_pBlockingLock = NULL; - m_alloc_context = nullptr; + m_pRuntimeThreadLocals = nullptr; m_thAllocContextObj = 0; m_UserInterrupt = 0; diff --git a/src/coreclr/vm/threads.h b/src/coreclr/vm/threads.h index ba5e2265766c82..c1ee8e7585bbff 100644 --- a/src/coreclr/vm/threads.h +++ b/src/coreclr/vm/threads.h @@ -448,6 +448,23 @@ class TailCallTls const PortableTailCallFrame* GetFrame() { return m_frame; } }; +// This struct contains data that lives as long as the current OS thread. +struct RuntimeThreadLocals +{ + // on MP systems, each thread has its own allocation chunk so we can avoid + // lock prefixes and expensive MP cache snooping stuff + gc_alloc_context alloc_context; +}; + +#ifdef _MSC_VER +// use selectany to avoid initialization de-optimization issues in the compiler +__declspec(selectany) +#else +extern +#endif +thread_local RuntimeThreadLocals t_runtime_thread_locals; + +typedef DPTR(struct RuntimeThreadLocals) PTR_RuntimeThreadLocals; typedef DPTR(struct gc_alloc_context) PTR_gc_alloc_context; // #ThreadClass @@ -947,14 +964,14 @@ class Thread // Lock thread is trying to acquire VolatilePtr m_pBlockingLock; - // We store a pointer to this thread's alloc context here for easier introspection + // We store a pointer to the runtime thread locals here for easier introspection // from other threads and diagnostic tools - PTR_gc_alloc_context m_alloc_context; + PTR_RuntimeThreadLocals m_pRuntimeThreadLocals; public: - inline void InitAllocContext() { LIMITED_METHOD_CONTRACT; m_alloc_context = PTR_gc_alloc_context(&t_thread_alloc_context); } + inline void InitRuntimeThreadLocals() { LIMITED_METHOD_CONTRACT; m_pRuntimeThreadLocals = PTR_RuntimeThreadLocals(&t_runtime_thread_locals); } - inline PTR_gc_alloc_context GetAllocContext() { LIMITED_METHOD_CONTRACT; return m_alloc_context; } + inline PTR_gc_alloc_context GetAllocContext() { LIMITED_METHOD_CONTRACT; return PTR_gc_alloc_context(&m_pRuntimeThreadLocals->alloc_context); } // This is the type handle of the first object in the alloc context at the time // we fire the AllocationTick event. It's only for tooling purpose. @@ -3973,7 +3990,7 @@ struct cdac_offsets static constexpr size_t OSId = offsetof(Thread, m_OSThreadId); static constexpr size_t State = offsetof(Thread, m_State); static constexpr size_t PreemptiveGCDisabled = offsetof(Thread, m_fPreemptiveGCDisabled); - static constexpr size_t AllocContext = offsetof(Thread, m_alloc_context); + static constexpr size_t RuntimeThreadLocals = offsetof(Thread, m_pRuntimeThreadLocals); static constexpr size_t Frame = offsetof(Thread, m_pFrame); static constexpr size_t ExposedObject = offsetof(Thread, m_ExposedObject); static constexpr size_t LastThrownObject = offsetof(Thread, m_LastThrownObjectHandle); diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index 1e24c1c8989292..91e8a731b53b18 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -2360,7 +2360,7 @@ void Thread::PerformPreemptiveGC() // BUG(github #10318) - when not using allocation contexts, the alloc lock // must be acquired here. Until fixed, this assert prevents random heap corruption. _ASSERTE(GCHeapUtilities::UseThreadAllocationContexts()); - GCHeapUtilities::GetGCHeap()->StressHeap(&t_thread_alloc_context); + GCHeapUtilities::GetGCHeap()->StressHeap(&t_runtime_thread_locals.alloc_context); m_bGCStressing = FALSE; } m_GCOnTransitionsOK = TRUE; diff --git a/src/native/managed/cdacreader/src/Contracts/Thread.cs b/src/native/managed/cdacreader/src/Contracts/Thread.cs index fa399ebc454988..399c62f2923cc5 100644 --- a/src/native/managed/cdacreader/src/Contracts/Thread.cs +++ b/src/native/managed/cdacreader/src/Contracts/Thread.cs @@ -122,8 +122,8 @@ ThreadData IThread.GetThreadData(TargetPointer threadPointer) thread.OSId, (ThreadState)thread.State, (thread.PreemptiveGCDisabled & 0x1) != 0, - thread.AllocContext is null ? TargetPointer.Null : thread.AllocContext.Pointer, - thread.AllocContext is null ? TargetPointer.Null : thread.AllocContext.Limit, + thread.RuntimeThreadLocals?.AllocContext.Pointer ?? TargetPointer.Null, + thread.RuntimeThreadLocals?.AllocContext.Limit ?? TargetPointer.Null, thread.Frame, firstNestedException, thread.TEB, diff --git a/src/native/managed/cdacreader/src/Data/RuntimeThreadLocals.cs b/src/native/managed/cdacreader/src/Data/RuntimeThreadLocals.cs new file mode 100644 index 00000000000000..2d7f92cb4cb247 --- /dev/null +++ b/src/native/managed/cdacreader/src/Data/RuntimeThreadLocals.cs @@ -0,0 +1,18 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +namespace Microsoft.Diagnostics.DataContractReader.Data; + +internal sealed class RuntimeThreadLocals : IData +{ + static RuntimeThreadLocals IData.Create(Target target, TargetPointer address) + => new RuntimeThreadLocals(target, address); + + public RuntimeThreadLocals(Target target, TargetPointer address) + { + Target.TypeInfo type = target.GetTypeInfo(DataType.RuntimeThreadLocals); + AllocContext = target.ProcessedData.GetOrAdd(address + (ulong)type.Fields[nameof(AllocContext)].Offset); + } + + public GCAllocContext AllocContext { get; init; } +} diff --git a/src/native/managed/cdacreader/src/Data/Thread.cs b/src/native/managed/cdacreader/src/Data/Thread.cs index 8995d0ede2179c..ac9d1f4029d0e2 100644 --- a/src/native/managed/cdacreader/src/Data/Thread.cs +++ b/src/native/managed/cdacreader/src/Data/Thread.cs @@ -17,9 +17,9 @@ public Thread(Target target, TargetPointer address) State = target.Read(address + (ulong)type.Fields[nameof(State)].Offset); PreemptiveGCDisabled = target.Read(address + (ulong)type.Fields[nameof(PreemptiveGCDisabled)].Offset); - TargetPointer allocContextPointer = target.ReadPointer(address + (ulong)type.Fields[nameof(AllocContext)].Offset); - if (allocContextPointer != TargetPointer.Null) - AllocContext = target.ProcessedData.GetOrAdd(allocContextPointer); + TargetPointer runtimeThreadLocalsPointer = target.ReadPointer(address + (ulong)type.Fields[nameof(RuntimeThreadLocals)].Offset); + if (runtimeThreadLocalsPointer != TargetPointer.Null) + RuntimeThreadLocals = target.ProcessedData.GetOrAdd(runtimeThreadLocalsPointer); Frame = target.ReadPointer(address + (ulong)type.Fields[nameof(Frame)].Offset); @@ -38,7 +38,7 @@ public Thread(Target target, TargetPointer address) public TargetNUInt OSId { get; init; } public uint State { get; init; } public uint PreemptiveGCDisabled { get; init; } - public GCAllocContext? AllocContext { get; init; } + public RuntimeThreadLocals? RuntimeThreadLocals { get; init; } public TargetPointer Frame { get; init; } public TargetPointer TEB { get; init; } public TargetPointer LastThrownObject { get; init; } diff --git a/src/native/managed/cdacreader/src/DataType.cs b/src/native/managed/cdacreader/src/DataType.cs index 68e41240e436b2..94274d5f47a8ba 100644 --- a/src/native/managed/cdacreader/src/DataType.cs +++ b/src/native/managed/cdacreader/src/DataType.cs @@ -24,4 +24,5 @@ public enum DataType ThreadStore, GCAllocContext, ExceptionInfo, + RuntimeThreadLocals }