-
Notifications
You must be signed in to change notification settings - Fork 2.7k
Expose a Utf8String type. #17872
Expose a Utf8String type. #17872
Changes from all commits
6b90660
fabbe5d
f8639fa
bcbbbb9
e4f9a43
b5f7f59
9e13d11
624897f
78e15a2
5e800cd
10db0e3
3618be1
6cbb3f8
2cff568
7758492
663fe01
4f78043
e8e82db
a00b751
bbdcae4
a382e1d
56ea91c
4279139
a3ce65a
24a8071
e16f421
5633b36
0711ac1
1119a96
3c85d80
5907d1c
049f0ed
603193a
f6d811e
6825173
b9b8040
103fce3
f997aaa
d026198
493c27f
5eb53cc
6cf128f
dd11e73
82ec9cb
4a01601
4ff38a1
fab65e9
d6258a4
21f47d3
4d60318
39a4e2e
279a4c5
b247f32
cfe461b
2f4ad81
11af9eb
00e7d26
5e9651f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
// Licensed to the .NET Foundation under one or more agreements. | ||
// The .NET Foundation licenses this file to you under the MIT license. | ||
// See the LICENSE file in the project root for more information. | ||
|
||
using System.Text; | ||
using System.Runtime.InteropServices; | ||
using System.Runtime.CompilerServices; | ||
|
||
namespace System | ||
{ | ||
// This is an experimental type and not referenced from CoreFx but needs to exists and be public so we can prototype in CoreFxLab. | ||
public sealed class Utf8String | ||
{ | ||
// Do not reorder these fields. Must match layout of Utf8StringObject in object.h. | ||
private readonly int _length; | ||
private readonly byte _firstByte; | ||
|
||
public int Length => _length; | ||
public ref readonly byte GetPinnableReference() => ref _firstByte; | ||
|
||
public static readonly Utf8String Empty = FastAllocate(0); | ||
|
||
// Utf8String constructors | ||
// These are special. The implementation methods for these have a different signature from the | ||
// declared constructors. | ||
|
||
[MethodImpl(MethodImplOptions.InternalCall)] | ||
public extern Utf8String(ReadOnlySpan<byte> value); | ||
|
||
#if PROJECTN | ||
[DependencyReductionRoot] | ||
#endif | ||
#if !CORECLR | ||
static | ||
#endif | ||
private Utf8String Ctor(ReadOnlySpan<byte> value) | ||
{ | ||
if (value.Length == 0) | ||
return Empty; | ||
|
||
Utf8String newString = FastAllocate(value.Length); | ||
unsafe | ||
{ | ||
fixed (byte* pDst = &newString._firstByte) | ||
fixed (byte* pSrc = &MemoryMarshal.GetNonNullPinnableReference(value)) | ||
{ | ||
Buffer.Memcpy(dest: pDst, src: pSrc, len: value.Length); | ||
} | ||
} | ||
return newString; | ||
} | ||
|
||
[MethodImpl(MethodImplOptions.InternalCall)] | ||
public extern Utf8String(ReadOnlySpan<char> value); | ||
|
||
#if PROJECTN | ||
[DependencyReductionRoot] | ||
#endif | ||
#if !CORECLR | ||
static | ||
#endif | ||
private Utf8String Ctor(ReadOnlySpan<char> value) | ||
{ | ||
if (value.Length == 0) | ||
return Empty; | ||
|
||
Encoding e = Encoding.UTF8; | ||
int length = e.GetByteCount(value); | ||
Utf8String newString = FastAllocate(length); | ||
unsafe | ||
{ | ||
fixed (byte* pFirstByte = &newString._firstByte) | ||
fixed (char* pFirstChar = &MemoryMarshal.GetNonNullPinnableReference(value)) | ||
{ | ||
e.GetBytes(pFirstChar, length, pFirstByte, length); | ||
} | ||
} | ||
return newString; | ||
} | ||
|
||
// Creates a new zero-initialized instance of the specified length. Actual storage allocated is "length + 1" bytes (the extra | ||
// +1 is for the NUL terminator.) | ||
[MethodImpl(MethodImplOptions.InternalCall)] | ||
public static extern Utf8String FastAllocate(int length); //TODO: Is public for experimentation in CoreFxLab. Will be private in its ultimate form. | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1059,6 +1059,73 @@ STRINGREF SlowAllocateString( DWORD cchStringLength ) | |
return( ObjectToSTRINGREF(orObject) ); | ||
} | ||
|
||
Utf8StringObject *SlowAllocateUtf8String(DWORD cchStringLength) | ||
{ | ||
CONTRACTL{ | ||
THROWS; | ||
GC_TRIGGERS; | ||
MODE_COOPERATIVE; // returns an objref without pinning it => cooperative | ||
} CONTRACTL_END; | ||
|
||
Utf8StringObject *orObject = NULL; | ||
|
||
#ifdef _DEBUG | ||
if (g_pConfig->ShouldInjectFault(INJECTFAULT_GCHEAP)) | ||
{ | ||
char *a = new char; | ||
delete a; | ||
} | ||
#endif | ||
|
||
// Limit the maximum string size to <2GB to mitigate risk of security issues caused by 32-bit integer | ||
// overflows in buffer size calculations. | ||
if (cchStringLength > 0x7FFFFFDF) | ||
ThrowOutOfMemory(); | ||
|
||
SIZE_T ObjectSize = PtrAlign(Utf8StringObject::GetSize(cchStringLength)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this allocates more than required. I have noticed that we may have the same problem in regular string. For example:
Do you know why it is the case? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have checked CoreRT. CoreRT allocates 0x20 bytes in this case, so there is definitely something pretty broken. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Isn't there some padding between size and content? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is padding for regular arrays (so that all arrays have same layout even when elements are 8-byte aligned). There is no padding for strings. The content starts right after length. The extra unnecessary bytes are at the end. I remember folks went to a great length to ensure that strings do not pay the extra 4 bytes during the initial 64-bit ports. It looks like it has regressed. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's coming from Presumably the same for StringObject, though I want to keep anything like that separate from this PR. We haven't yet agreed to merge this into master. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. #17876 has the fix for the String overallocation problem. |
||
_ASSERTE(ObjectSize > cchStringLength); | ||
|
||
SetTypeHandleOnThreadForAlloc(TypeHandle(g_pUtf8StringClass)); | ||
|
||
orObject = (Utf8StringObject *)Alloc(ObjectSize, FALSE, FALSE); | ||
|
||
// Object is zero-init already | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This includes the null terminator being zero-inited, I suppose? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep. |
||
_ASSERTE(orObject->HasEmptySyncBlockInfo()); | ||
|
||
// Initialize Object | ||
orObject->SetMethodTable(g_pUtf8StringClass); | ||
orObject->SetLength(cchStringLength); | ||
|
||
if (ObjectSize >= LARGE_OBJECT_SIZE) | ||
{ | ||
GCHeapUtilities::GetGCHeap()->PublishObject((BYTE*)orObject); | ||
} | ||
|
||
// Notify the profiler of the allocation | ||
if (TrackAllocations()) | ||
{ | ||
OBJECTREF objref = ObjectToOBJECTREF((Object*)orObject); | ||
GCPROTECT_BEGIN(objref); | ||
ProfilerObjectAllocatedCallback(objref, (ClassID)orObject->GetTypeHandle().AsPtr()); | ||
GCPROTECT_END(); | ||
|
||
orObject = (Utf8StringObject *)OBJECTREFToObject(objref); | ||
} | ||
|
||
#ifdef FEATURE_EVENT_TRACE | ||
// Send ETW event for allocation | ||
if (ETW::TypeSystemLog::IsHeapAllocEventEnabled()) | ||
{ | ||
ETW::TypeSystemLog::SendObjectAllocatedEvent(orObject); | ||
} | ||
#endif // FEATURE_EVENT_TRACE | ||
|
||
LogAlloc(ObjectSize, g_pUtf8StringClass, orObject); | ||
|
||
return orObject; | ||
} | ||
|
||
|
||
#ifdef FEATURE_COMINTEROP_UNMANAGED_ACTIVATION | ||
// OBJECTREF AllocateComClassObject(ComClassFactory* pComClsFac) | ||
void AllocateComClassObject(ComClassFactory* pComClsFac, OBJECTREF* ppRefClass) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we plan to specialize empty strings like they are specialized in regular strings?