diff --git a/src/System.Private.CoreLib/Resources/Strings.resx b/src/System.Private.CoreLib/Resources/Strings.resx index a1a386af9496..37b6a2f3c17c 100644 --- a/src/System.Private.CoreLib/Resources/Strings.resx +++ b/src/System.Private.CoreLib/Resources/Strings.resx @@ -1400,7 +1400,7 @@ The object has no underlying COM data associated with it. - Uninitialized Strings cannot be created. + Uninitialized strings cannot be created. The object's type must not be a Windows Runtime type. diff --git a/src/System.Private.CoreLib/System.Private.CoreLib.csproj b/src/System.Private.CoreLib/System.Private.CoreLib.csproj index 0dda24d4e326..07dc99045fe2 100644 --- a/src/System.Private.CoreLib/System.Private.CoreLib.csproj +++ b/src/System.Private.CoreLib/System.Private.CoreLib.csproj @@ -288,6 +288,7 @@ + diff --git a/src/System.Private.CoreLib/src/System/Utf8String.cs b/src/System.Private.CoreLib/src/System/Utf8String.cs new file mode 100644 index 000000000000..6ff7e0a1b0e9 --- /dev/null +++ b/src/System.Private.CoreLib/src/System/Utf8String.cs @@ -0,0 +1,86 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Text; +using System.Runtime.InteropServices; +using System.Runtime.CompilerServices; + +namespace System +{ + // This is an experimental type and not referenced from CoreFx but needs to exists and be public so we can prototype in CoreFxLab. + public sealed class Utf8String + { + // Do not reorder these fields. Must match layout of Utf8StringObject in object.h. + private readonly int _length; + private readonly byte _firstByte; + + public int Length => _length; + public ref readonly byte GetPinnableReference() => ref _firstByte; + + public static readonly Utf8String Empty = FastAllocate(0); + + // Utf8String constructors + // These are special. The implementation methods for these have a different signature from the + // declared constructors. + + [MethodImpl(MethodImplOptions.InternalCall)] + public extern Utf8String(ReadOnlySpan value); + +#if PROJECTN + [DependencyReductionRoot] +#endif +#if !CORECLR + static +#endif + private Utf8String Ctor(ReadOnlySpan value) + { + if (value.Length == 0) + return Empty; + + Utf8String newString = FastAllocate(value.Length); + unsafe + { + fixed (byte* pDst = &newString._firstByte) + fixed (byte* pSrc = &MemoryMarshal.GetNonNullPinnableReference(value)) + { + Buffer.Memcpy(dest: pDst, src: pSrc, len: value.Length); + } + } + return newString; + } + + [MethodImpl(MethodImplOptions.InternalCall)] + public extern Utf8String(ReadOnlySpan value); + +#if PROJECTN + [DependencyReductionRoot] +#endif +#if !CORECLR + static +#endif + private Utf8String Ctor(ReadOnlySpan value) + { + if (value.Length == 0) + return Empty; + + Encoding e = Encoding.UTF8; + int length = e.GetByteCount(value); + Utf8String newString = FastAllocate(length); + unsafe + { + fixed (byte* pFirstByte = &newString._firstByte) + fixed (char* pFirstChar = &MemoryMarshal.GetNonNullPinnableReference(value)) + { + e.GetBytes(pFirstChar, length, pFirstByte, length); + } + } + return newString; + } + + // Creates a new zero-initialized instance of the specified length. Actual storage allocated is "length + 1" bytes (the extra + // +1 is for the NUL terminator.) + [MethodImpl(MethodImplOptions.InternalCall)] + public static extern Utf8String FastAllocate(int length); //TODO: Is public for experimentation in CoreFxLab. Will be private in its ultimate form. + } +} diff --git a/src/inc/dacvars.h b/src/inc/dacvars.h index a4209f12ac65..f23d6111ef3d 100644 --- a/src/inc/dacvars.h +++ b/src/inc/dacvars.h @@ -172,6 +172,7 @@ DEFINE_DACVAR(ULONG, UNKNOWN_POINTER_TYPE, dac__g_pObjectClass, ::g_pObjectClass DEFINE_DACVAR(ULONG, UNKNOWN_POINTER_TYPE, dac__g_pRuntimeTypeClass, ::g_pRuntimeTypeClass) DEFINE_DACVAR(ULONG, UNKNOWN_POINTER_TYPE, dac__g_pCanonMethodTableClass, ::g_pCanonMethodTableClass) DEFINE_DACVAR(ULONG, UNKNOWN_POINTER_TYPE, dac__g_pStringClass, ::g_pStringClass) +DEFINE_DACVAR(ULONG, UNKNOWN_POINTER_TYPE, dac__g_pUtf8StringClass, ::g_pUtf8StringClass) DEFINE_DACVAR(ULONG, UNKNOWN_POINTER_TYPE, dac__g_pArrayClass, ::g_pArrayClass) DEFINE_DACVAR(ULONG, UNKNOWN_POINTER_TYPE, dac__g_pSZArrayHelperClass, ::g_pSZArrayHelperClass) DEFINE_DACVAR(ULONG, UNKNOWN_POINTER_TYPE, dac__g_pNullableClass, ::g_pNullableClass) diff --git a/src/vm/appdomain.cpp b/src/vm/appdomain.cpp index 3ca7ec7e24a2..4d8c5ff64c12 100644 --- a/src/vm/appdomain.cpp +++ b/src/vm/appdomain.cpp @@ -2760,6 +2760,9 @@ void SystemDomain::LoadBaseSystemClasses() // Load String g_pStringClass = MscorlibBinder::LoadPrimitiveType(ELEMENT_TYPE_STRING); + // Load Utf8String + g_pUtf8StringClass = MscorlibBinder::GetClass(CLASS__UTF8_STRING); + // Used by Buffer::BlockCopy g_pByteArrayMT = ClassLoader::LoadArrayTypeThrowing( TypeHandle(MscorlibBinder::GetElementType(ELEMENT_TYPE_U1))).AsArray()->GetMethodTable(); diff --git a/src/vm/classnames.h b/src/vm/classnames.h index fec305232634..fea50f7f24ca 100644 --- a/src/vm/classnames.h +++ b/src/vm/classnames.h @@ -147,6 +147,8 @@ #define g_TransparentProxyName "__TransparentProxy" #define g_TypeClassName "System.Type" +#define g_Utf8StringName "Utf8String" + #define g_VariantClassName "System.Variant" #define g_GuidClassName "System.Guid" diff --git a/src/vm/ecall.cpp b/src/vm/ecall.cpp index 3812ff10305e..0a59f51edde3 100644 --- a/src/vm/ecall.cpp +++ b/src/vm/ecall.cpp @@ -55,6 +55,31 @@ static_assert_no_msg(ECallCtor_First + 8 == ECall::CtorSBytePtrStartLengthEncodi #define NumberOfStringConstructors 9 +#define METHOD__UTF8STRING__CTORF_FIRST METHOD__UTF8_STRING__CTORF_READONLYSPANOFBYTE +static_assert_no_msg(METHOD__UTF8STRING__CTORF_FIRST + 0 == METHOD__UTF8_STRING__CTORF_READONLYSPANOFBYTE); +static_assert_no_msg(METHOD__UTF8STRING__CTORF_FIRST + 1 == METHOD__UTF8_STRING__CTORF_READONLYSPANOFCHAR); + +#define ECallUtf8String_Ctor_First ECall::Utf8StringCtorReadOnlySpanOfByteManaged +static_assert_no_msg(ECallUtf8String_Ctor_First + 0 == ECall::Utf8StringCtorReadOnlySpanOfByteManaged); +static_assert_no_msg(ECallUtf8String_Ctor_First + 1 == ECall::Utf8StringCtorReadOnlySpanOfCharManaged); + +#define NumberOfUtf8StringConstructors 2 + +static void PopulateConstructors(DWORD firstECallIndex, BinderMethodID firstMethod, int numConstructors) +{ + STANDARD_VM_CONTRACT; + + for (int i = 0; i < numConstructors; i++) + { + MethodDesc* pMD = MscorlibBinder::GetMethod((BinderMethodID)(firstMethod + i)); + _ASSERTE(pMD != NULL); + + PCODE pDest = pMD->GetMultiCallableAddrOfCode(); + + ECall::DynamicallyAssignFCallImpl(pDest, firstECallIndex + i); + } +} + void ECall::PopulateManagedStringConstructors() { STANDARD_VM_CONTRACT; @@ -62,16 +87,11 @@ void ECall::PopulateManagedStringConstructors() INDEBUG(static bool fInitialized = false); _ASSERTE(!fInitialized); // assume this method is only called once _ASSERTE(g_pStringClass != NULL); + _ASSERTE(g_pUtf8StringClass != NULL); - for (int i = 0; i < NumberOfStringConstructors; i++) - { - MethodDesc* pMD = MscorlibBinder::GetMethod((BinderMethodID)(METHOD__STRING__CTORF_FIRST + i)); - _ASSERTE(pMD != NULL); - - PCODE pDest = pMD->GetMultiCallableAddrOfCode(); + PopulateConstructors(ECallCtor_First, METHOD__STRING__CTORF_FIRST, NumberOfStringConstructors); + PopulateConstructors(ECallUtf8String_Ctor_First, METHOD__UTF8STRING__CTORF_FIRST, NumberOfUtf8StringConstructors); - ECall::DynamicallyAssignFCallImpl(pDest, ECallCtor_First + i); - } INDEBUG(fInitialized = true); } diff --git a/src/vm/ecall.h b/src/vm/ecall.h index c809109c4ce4..aa1fd827004a 100644 --- a/src/vm/ecall.h +++ b/src/vm/ecall.h @@ -114,7 +114,10 @@ class ECall DYNAMICALLY_ASSIGNED_FCALL_IMPL(CtorSBytePtrManaged, NULL) \ DYNAMICALLY_ASSIGNED_FCALL_IMPL(CtorSBytePtrStartLengthManaged, NULL) \ DYNAMICALLY_ASSIGNED_FCALL_IMPL(CtorSBytePtrStartLengthEncodingManaged, NULL) \ + DYNAMICALLY_ASSIGNED_FCALL_IMPL(FastAllocateUtf8String, FramedAllocateUtf8String) \ DYNAMICALLY_ASSIGNED_FCALL_IMPL(InternalGetCurrentThread, NULL) \ + DYNAMICALLY_ASSIGNED_FCALL_IMPL(Utf8StringCtorReadOnlySpanOfByteManaged, NULL) \ + DYNAMICALLY_ASSIGNED_FCALL_IMPL(Utf8StringCtorReadOnlySpanOfCharManaged, NULL) \ enum { diff --git a/src/vm/ecalllist.h b/src/vm/ecalllist.h index 8ee51b455b13..49543368c7b1 100644 --- a/src/vm/ecalllist.h +++ b/src/vm/ecalllist.h @@ -116,6 +116,12 @@ FCFuncStart(gStringFuncs) #endif // FEATURE_COMINTEROP FCFuncEnd() +FCFuncStart(gUtf8StringFuncs) + FCDynamic("FastAllocate", CORINFO_INTRINSIC_Illegal, ECall::FastAllocateUtf8String) + FCDynamicSig(COR_CTOR_METHOD_NAME, &gsig_IM_ReadOnlySpanOfByte_RetVoid, CORINFO_INTRINSIC_Illegal, ECall::Utf8StringCtorReadOnlySpanOfByteManaged) + FCDynamicSig(COR_CTOR_METHOD_NAME, &gsig_IM_ReadOnlySpanOfChar_RetVoid, CORINFO_INTRINSIC_Illegal, ECall::Utf8StringCtorReadOnlySpanOfCharManaged) +FCFuncEnd() + FCFuncStart(gValueTypeFuncs) FCFuncElement("CanCompareBits", ValueTypeHelper::CanCompareBits) FCFuncElement("FastEqualsCheck", ValueTypeHelper::FastEqualsCheck) @@ -1377,6 +1383,7 @@ FCClassElement("TypedReference", "System", gTypedReferenceFuncs) #ifdef FEATURE_COMINTEROP FCClassElement("UriMarshaler", "System.StubHelpers", gUriMarshalerFuncs) #endif +FCClassElement("Utf8String", "System", gUtf8StringFuncs) FCClassElement("ValueClassMarshaler", "System.StubHelpers", gValueClassMarshalerFuncs) FCClassElement("ValueType", "System", gValueTypeFuncs) #ifdef FEATURE_COMINTEROP diff --git a/src/vm/gchelpers.cpp b/src/vm/gchelpers.cpp index 4684edeaf571..586d12877767 100644 --- a/src/vm/gchelpers.cpp +++ b/src/vm/gchelpers.cpp @@ -1059,6 +1059,73 @@ STRINGREF SlowAllocateString( DWORD cchStringLength ) return( ObjectToSTRINGREF(orObject) ); } +Utf8StringObject *SlowAllocateUtf8String(DWORD cchStringLength) +{ + CONTRACTL{ + THROWS; + GC_TRIGGERS; + MODE_COOPERATIVE; // returns an objref without pinning it => cooperative + } CONTRACTL_END; + + Utf8StringObject *orObject = NULL; + +#ifdef _DEBUG + if (g_pConfig->ShouldInjectFault(INJECTFAULT_GCHEAP)) + { + char *a = new char; + delete a; + } +#endif + + // Limit the maximum string size to <2GB to mitigate risk of security issues caused by 32-bit integer + // overflows in buffer size calculations. + if (cchStringLength > 0x7FFFFFDF) + ThrowOutOfMemory(); + + SIZE_T ObjectSize = PtrAlign(Utf8StringObject::GetSize(cchStringLength)); + _ASSERTE(ObjectSize > cchStringLength); + + SetTypeHandleOnThreadForAlloc(TypeHandle(g_pUtf8StringClass)); + + orObject = (Utf8StringObject *)Alloc(ObjectSize, FALSE, FALSE); + + // Object is zero-init already + _ASSERTE(orObject->HasEmptySyncBlockInfo()); + + // Initialize Object + orObject->SetMethodTable(g_pUtf8StringClass); + orObject->SetLength(cchStringLength); + + if (ObjectSize >= LARGE_OBJECT_SIZE) + { + GCHeapUtilities::GetGCHeap()->PublishObject((BYTE*)orObject); + } + + // Notify the profiler of the allocation + if (TrackAllocations()) + { + OBJECTREF objref = ObjectToOBJECTREF((Object*)orObject); + GCPROTECT_BEGIN(objref); + ProfilerObjectAllocatedCallback(objref, (ClassID)orObject->GetTypeHandle().AsPtr()); + GCPROTECT_END(); + + orObject = (Utf8StringObject *)OBJECTREFToObject(objref); + } + +#ifdef FEATURE_EVENT_TRACE + // Send ETW event for allocation + if (ETW::TypeSystemLog::IsHeapAllocEventEnabled()) + { + ETW::TypeSystemLog::SendObjectAllocatedEvent(orObject); + } +#endif // FEATURE_EVENT_TRACE + + LogAlloc(ObjectSize, g_pUtf8StringClass, orObject); + + return orObject; +} + + #ifdef FEATURE_COMINTEROP_UNMANAGED_ACTIVATION // OBJECTREF AllocateComClassObject(ComClassFactory* pComClsFac) void AllocateComClassObject(ComClassFactory* pComClsFac, OBJECTREF* ppRefClass) diff --git a/src/vm/gchelpers.h b/src/vm/gchelpers.h index 0e407c6e6127..8357d2b2a5fc 100644 --- a/src/vm/gchelpers.h +++ b/src/vm/gchelpers.h @@ -71,6 +71,8 @@ STRINGREF AllocateString( DWORD cchStringLength ); // The slow version, implemented in gcscan.cpp STRINGREF SlowAllocateString( DWORD cchStringLength ); +Utf8StringObject *SlowAllocateUtf8String( DWORD cchStringLength ); + #else // On other platforms, go to the (somewhat less efficient) implementations in gcscan.cpp @@ -83,6 +85,8 @@ OBJECTREF AllocateObjectArray(DWORD cElements, TypeHandle ElementType, BOOL bAll STRINGREF SlowAllocateString( DWORD cchStringLength ); +Utf8StringObject *SlowAllocateUtf8String( DWORD cchStringLength ); + inline STRINGREF AllocateString( DWORD cchStringLength ) { WRAPPER_NO_CONTRACT; diff --git a/src/vm/jithelpers.cpp b/src/vm/jithelpers.cpp index 6d31a187290f..9823f30ed05f 100644 --- a/src/vm/jithelpers.cpp +++ b/src/vm/jithelpers.cpp @@ -2881,6 +2881,60 @@ HCIMPL1(StringObject*, AllocateString_MP_FastPortable, DWORD stringLength) } HCIMPLEND +HCIMPL1(Utf8StringObject*, AllocateUtf8String_MP_FastPortable, DWORD stringLength) +{ + FCALL_CONTRACT; + + do + { + _ASSERTE(GCHeapUtilities::UseThreadAllocationContexts()); + + // Instead of doing elaborate overflow checks, we just limit the number of elements. This will avoid all overflow + // problems, as well as making sure big string objects are correctly allocated in the big object heap. + if (stringLength >= LARGE_OBJECT_SIZE - 256) + { + break; + } + + // This is typically the only call in the fast path. Making the call early seems to be better, as it allows the compiler + // to use volatile registers for intermediate values. This reduces the number of push/pop instructions and eliminates + // some reshuffling of intermediate values into nonvolatile registers around the call. + Thread *thread = GetThread(); + + SIZE_T totalSize = Utf8StringObject::GetSize(stringLength); + + // The method table's base size includes space for a terminating null character + _ASSERTE(totalSize >= g_pUtf8StringClass->GetBaseSize()); + _ASSERTE(totalSize - g_pUtf8StringClass->GetBaseSize() == stringLength); + + SIZE_T alignedTotalSize = ALIGN_UP(totalSize, DATA_ALIGNMENT); + _ASSERTE(alignedTotalSize >= totalSize); + totalSize = alignedTotalSize; + + gc_alloc_context *allocContext = thread->GetAllocContext(); + BYTE *allocPtr = allocContext->alloc_ptr; + _ASSERTE(allocPtr <= allocContext->alloc_limit); + if (totalSize > static_cast(allocContext->alloc_limit - allocPtr)) + { + break; + } + allocContext->alloc_ptr = allocPtr + totalSize; + + _ASSERTE(allocPtr != nullptr); + Utf8StringObject *stringObject = reinterpret_cast(allocPtr); + stringObject->SetMethodTable(g_pUtf8StringClass); + stringObject->SetLength(stringLength); + + return stringObject; + } while (false); + + // Tail call to the slow helper + ENDFORBIDGC(); + return HCCALL1(FramedAllocateUtf8String, stringLength); +} +HCIMPLEND + + #include /*********************************************************************/ @@ -2920,6 +2974,20 @@ HCIMPL1(StringObject*, FramedAllocateString, DWORD stringLength) } HCIMPLEND +HCIMPL1(Utf8StringObject*, FramedAllocateUtf8String, DWORD stringLength) +{ + FCALL_CONTRACT; + + Utf8StringObject* result = NULL; + HELPER_METHOD_FRAME_BEGIN_RET_0(); // Set up a frame + + result = SlowAllocateUtf8String(stringLength); + + HELPER_METHOD_FRAME_END(); + return result; +} +HCIMPLEND + /*********************************************************************/ OBJECTHANDLE ConstructStringLiteral(CORINFO_MODULE_HANDLE scopeHnd, mdToken metaTok) { diff --git a/src/vm/jitinterface.h b/src/vm/jitinterface.h index ebe64edbb9f4..832d47f17557 100644 --- a/src/vm/jitinterface.h +++ b/src/vm/jitinterface.h @@ -223,6 +223,9 @@ extern FCDECL1(StringObject*, AllocateString_MP_FastPortable, DWORD stringLength extern FCDECL1(StringObject*, UnframedAllocateString, DWORD stringLength); extern FCDECL1(StringObject*, FramedAllocateString, DWORD stringLength); +extern FCDECL1(Utf8StringObject*, AllocateUtf8String_MP_FastPortable, DWORD stringLength); +extern FCDECL1(Utf8StringObject*, FramedAllocateUtf8String, DWORD stringLength); + extern FCDECL2(Object*, JIT_NewArr1VC_MP_FastPortable, CORINFO_CLASS_HANDLE arrayMT, INT_PTR size); extern FCDECL2(Object*, JIT_NewArr1OBJ_MP_FastPortable, CORINFO_CLASS_HANDLE arrayMT, INT_PTR size); extern FCDECL2(Object*, JIT_NewArr1_R2R, CORINFO_CLASS_HANDLE arrayTypeHnd_, INT_PTR size); diff --git a/src/vm/jitinterfacegen.cpp b/src/vm/jitinterfacegen.cpp index b630e7f9989d..2d1241b09478 100644 --- a/src/vm/jitinterfacegen.cpp +++ b/src/vm/jitinterfacegen.cpp @@ -84,6 +84,7 @@ void InitJITHelpers1() SetJitHelperFunction(CORINFO_HELP_NEWARR_1_OBJ, JIT_NewArr1OBJ_MP_FastPortable); ECall::DynamicallyAssignFCallImpl(GetEEFuncEntryPoint(AllocateString_MP_FastPortable), ECall::FastAllocateString); + ECall::DynamicallyAssignFCallImpl(GetEEFuncEntryPoint(AllocateUtf8String_MP_FastPortable), ECall::FastAllocateUtf8String); #else // FEATURE_PAL // if (multi-proc || server GC) if (GCHeapUtilities::UseThreadAllocationContexts()) @@ -95,6 +96,7 @@ void InitJITHelpers1() SetJitHelperFunction(CORINFO_HELP_NEWARR_1_OBJ, JIT_NewArr1OBJ_MP_InlineGetThread); ECall::DynamicallyAssignFCallImpl(GetEEFuncEntryPoint(AllocateStringFastMP_InlineGetThread), ECall::FastAllocateString); + ECall::DynamicallyAssignFCallImpl(GetEEFuncEntryPoint(AllocateUtf8String_MP_FastPortable), ECall::FastAllocateUtf8String); } else { @@ -109,6 +111,7 @@ void InitJITHelpers1() SetJitHelperFunction(CORINFO_HELP_NEWARR_1_OBJ, JIT_NewArr1OBJ_UP); ECall::DynamicallyAssignFCallImpl(GetEEFuncEntryPoint(AllocateStringFastUP), ECall::FastAllocateString); + ECall::DynamicallyAssignFCallImpl(GetEEFuncEntryPoint(AllocateUtf8String_MP_FastPortable), ECall::FastAllocateUtf8String); } #endif // FEATURE_PAL } diff --git a/src/vm/metasig.h b/src/vm/metasig.h index b3a4139a7c8a..b8643afbf8ea 100644 --- a/src/vm/metasig.h +++ b/src/vm/metasig.h @@ -398,6 +398,7 @@ DEFINE_METASIG(IM(Bool_Bool_RetStr, F F, s)) DEFINE_METASIG(IM(PtrChar_RetVoid, P(u), v)) DEFINE_METASIG(IM(PtrChar_Int_Int_RetVoid, P(u) i i, v)) +DEFINE_METASIG_T(IM(ReadOnlySpanOfByte_RetVoid, GI(g(READONLY_SPAN), 1, b), v)) DEFINE_METASIG_T(IM(ReadOnlySpanOfChar_RetVoid, GI(g(READONLY_SPAN), 1, u), v)) DEFINE_METASIG(IM(PtrSByt_RetVoid, P(B), v)) DEFINE_METASIG(IM(PtrSByt_Int_Int_RetVoid, P(B) i i, v)) @@ -416,6 +417,10 @@ DEFINE_METASIG(IM(PtrSByt_Int_Int_RetStr, P(B) i i, s)) DEFINE_METASIG_T(IM(PtrSByt_Int_Int_Encoding_RetStr, P(B) i i C(ENCODING), s)) DEFINE_METASIG(IM(Obj_Int_RetIntPtr, j i, I)) +DEFINE_METASIG_T(IM(ArrChar_RetUtf8Str, a(u), C(UTF8_STRING))) +DEFINE_METASIG_T(IM(ReadOnlySpanOfByte_RetUtf8Str, GI(g(READONLY_SPAN), 1, b), C(UTF8_STRING))) +DEFINE_METASIG_T(IM(ReadOnlySpanOfChar_RetUtf8Str, GI(g(READONLY_SPAN), 1, u), C(UTF8_STRING))) + DEFINE_METASIG(IM(Char_Char_RetStr, u u, s)) DEFINE_METASIG(IM(Char_Int_RetVoid, u i, v)) DEFINE_METASIG_T(IM(CultureInfo_RetVoid, C(CULTURE_INFO), v)) diff --git a/src/vm/methodtable.h b/src/vm/methodtable.h index 98702961f35d..527e521f4a8d 100644 --- a/src/vm/methodtable.h +++ b/src/vm/methodtable.h @@ -1891,7 +1891,7 @@ class MethodTable BOOL IsString() { LIMITED_METHOD_DAC_CONTRACT; - return HasComponentSize() && !IsArray(); + return HasComponentSize() && !IsArray() && RawGetComponentSize() == 2; } BOOL HasComponentSize() const diff --git a/src/vm/methodtablebuilder.cpp b/src/vm/methodtablebuilder.cpp index ef9c37c5d578..22beb0688dba 100644 --- a/src/vm/methodtablebuilder.cpp +++ b/src/vm/methodtablebuilder.cpp @@ -9736,6 +9736,17 @@ void MethodTableBuilder::CheckForSystemTypes() pMT->SetComponentSize(2); } + else if (strcmp(name, g_Utf8StringName) == 0 && strcmp(nameSpace, g_SystemNS) == 0) + { + // Utf8Strings are not "normal" objects, so we need to mess with their method table a bit + // so that the GC can figure out how big each string is... + DWORD baseSize = Utf8StringObject::GetBaseSize(); + pMT->SetBaseSize(baseSize); // NULL character included + + GetHalfBakedClass()->SetBaseSizePadding(baseSize - bmtFP->NumInstanceFieldBytes); + + pMT->SetComponentSize(1); + } else if (strcmp(name, g_CriticalFinalizerObjectName) == 0 && strcmp(nameSpace, g_ConstrainedExecutionNS) == 0) { // To introduce a class with a critical finalizer, diff --git a/src/vm/mscorlib.h b/src/vm/mscorlib.h index c57290ff1285..986c152d34db 100644 --- a/src/vm/mscorlib.h +++ b/src/vm/mscorlib.h @@ -884,6 +884,10 @@ DEFINE_METHOD(STRING, INTERNAL_COPY, InternalCopy, DEFINE_METHOD(STRING, WCSLEN, wcslen, SM_PtrChar_RetInt) DEFINE_PROPERTY(STRING, LENGTH, Length, Int) +DEFINE_CLASS(UTF8_STRING, System, Utf8String) +DEFINE_METHOD(UTF8_STRING, CTORF_READONLYSPANOFBYTE,Ctor, IM_ReadOnlySpanOfByte_RetUtf8Str) +DEFINE_METHOD(UTF8_STRING, CTORF_READONLYSPANOFCHAR,Ctor, IM_ReadOnlySpanOfChar_RetUtf8Str) + DEFINE_CLASS(STRING_BUILDER, Text, StringBuilder) DEFINE_PROPERTY(STRING_BUILDER, LENGTH, Length, Int) DEFINE_PROPERTY(STRING_BUILDER, CAPACITY, Capacity, Int) diff --git a/src/vm/object.h b/src/vm/object.h index 29a53e1ff7ea..711fd40b81c9 100644 --- a/src/vm/object.h +++ b/src/vm/object.h @@ -885,6 +885,7 @@ typedef DPTR(U8Array) PTR_U8Array; typedef DPTR(PTRArray) PTR_PTRArray; class StringObject; +class Utf8StringObject; #ifdef USE_CHECKED_OBJECTREFS typedef REF BASEARRAYREF; @@ -1216,6 +1217,28 @@ class ReflectClassBaseObject : public BaseObjectWithCachedData }; +class Utf8StringObject : public Object +{ +#ifdef DACCESS_COMPILE + friend class ClrDataAccess; +#endif + +private: + DWORD m_StringLength; + BYTE m_FirstChar; + +public: + VOID SetLength(DWORD len) { LIMITED_METHOD_CONTRACT; _ASSERTE(len >= 0); m_StringLength = len; } + +protected: + Utf8StringObject() { LIMITED_METHOD_CONTRACT; } + ~Utf8StringObject() { LIMITED_METHOD_CONTRACT; } + +public: + static DWORD GetBaseSize(); + static SIZE_T GetSize(DWORD stringLength); +}; + // This is the Method version of the Reflection object. // A Method has adddition information. // m_pMD - A pointer to the actual MethodDesc of the method. diff --git a/src/vm/object.inl b/src/vm/object.inl index dd167d806c22..91247b60925e 100644 --- a/src/vm/object.inl +++ b/src/vm/object.inl @@ -72,6 +72,20 @@ __forceinline /*static*/ SIZE_T StringObject::GetSize(DWORD strLen) return GetBaseSize() + strLen * sizeof(WCHAR); } +__forceinline /*static*/ DWORD Utf8StringObject::GetBaseSize() +{ + LIMITED_METHOD_DAC_CONTRACT; + + return OBJECT_BASESIZE + sizeof(DWORD) /* length */ + sizeof(BYTE) /* null terminator */; +} + +__forceinline /*static*/ SIZE_T Utf8StringObject::GetSize(DWORD strLen) +{ + LIMITED_METHOD_DAC_CONTRACT; + + return GetBaseSize() + strLen; +} + #ifdef DACCESS_COMPILE inline void Object::EnumMemoryRegions(void) diff --git a/src/vm/reflectioninvocation.cpp b/src/vm/reflectioninvocation.cpp index 742a26e7a4f6..b9aaa428249d 100644 --- a/src/vm/reflectioninvocation.cpp +++ b/src/vm/reflectioninvocation.cpp @@ -1007,6 +1007,7 @@ FCIMPL5(Object*, RuntimeMethodHandle::InvokeMethod, // Skip the activation optimization for remoting because of remoting proxy is not always activated. // It would be nice to clean this up and get remoting to always activate methodtable behind the proxy. BOOL fForceActivationForRemoting = FALSE; + BOOL fCtorOfVariableSizedObject = FALSE; if (fConstructor) { @@ -1024,7 +1025,8 @@ FCIMPL5(Object*, RuntimeMethodHandle::InvokeMethod, MethodTable * pMT = ownerType.AsMethodTable(); { - if (pMT != g_pStringClass) + fCtorOfVariableSizedObject = pMT->HasComponentSize(); + if (!fCtorOfVariableSizedObject) gc.retVal = pMT->Allocate(); } } @@ -1331,7 +1333,7 @@ FCIMPL5(Object*, RuntimeMethodHandle::InvokeMethod, if (fConstructor) { // We have a special case for Strings...The object is returned... - if (ownerType == TypeHandle(g_pStringClass)) { + if (fCtorOfVariableSizedObject) { PVOID pReturnValue = &callDescrData.returnValue; gc.retVal = *(OBJECTREF *)pReturnValue; } @@ -2680,7 +2682,7 @@ FCIMPL1(Object*, ReflectionSerialization::GetUninitializedObject, ReflectClassBa PREFIX_ASSUME(pMT != NULL); //We don't allow unitialized strings. - if (pMT == g_pStringClass) { + if (pMT->HasComponentSize()) { COMPlusThrow(kArgumentException, W("Argument_NoUninitializedStrings")); } diff --git a/src/vm/vars.cpp b/src/vm/vars.cpp index 04437f9964c8..88d6ba93bd7b 100644 --- a/src/vm/vars.cpp +++ b/src/vm/vars.cpp @@ -61,6 +61,7 @@ GPTR_IMPL(MethodTable, g_pObjectClass); GPTR_IMPL(MethodTable, g_pRuntimeTypeClass); GPTR_IMPL(MethodTable, g_pCanonMethodTableClass); // System.__Canon GPTR_IMPL(MethodTable, g_pStringClass); +GPTR_IMPL(MethodTable, g_pUtf8StringClass); GPTR_IMPL(MethodTable, g_pArrayClass); GPTR_IMPL(MethodTable, g_pSZArrayHelperClass); GPTR_IMPL(MethodTable, g_pNullableClass); diff --git a/src/vm/vars.hpp b/src/vm/vars.hpp index b6da9c5a71f0..9d53d8772991 100644 --- a/src/vm/vars.hpp +++ b/src/vm/vars.hpp @@ -364,6 +364,7 @@ GPTR_DECL(MethodTable, g_pObjectClass); GPTR_DECL(MethodTable, g_pRuntimeTypeClass); GPTR_DECL(MethodTable, g_pCanonMethodTableClass); // System.__Canon GPTR_DECL(MethodTable, g_pStringClass); +GPTR_DECL(MethodTable, g_pUtf8StringClass); GPTR_DECL(MethodTable, g_pArrayClass); GPTR_DECL(MethodTable, g_pSZArrayHelperClass); GPTR_DECL(MethodTable, g_pNullableClass);