-
Notifications
You must be signed in to change notification settings - Fork 2.7k
Expose a Utf8String type. #17872
Expose a Utf8String type. #17872
Changes from 2 commits
6b90660
fabbe5d
f8639fa
bcbbbb9
e4f9a43
b5f7f59
9e13d11
624897f
78e15a2
5e800cd
10db0e3
3618be1
6cbb3f8
2cff568
7758492
663fe01
4f78043
e8e82db
a00b751
bbdcae4
a382e1d
56ea91c
4279139
a3ce65a
24a8071
e16f421
5633b36
0711ac1
1119a96
3c85d80
5907d1c
049f0ed
603193a
f6d811e
6825173
b9b8040
103fce3
f997aaa
d026198
493c27f
5eb53cc
6cf128f
dd11e73
82ec9cb
4a01601
4ff38a1
fab65e9
d6258a4
21f47d3
4d60318
39a4e2e
279a4c5
b247f32
cfe461b
2f4ad81
11af9eb
00e7d26
5e9651f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
// Licensed to the .NET Foundation under one or more agreements. | ||
// The .NET Foundation licenses this file to you under the MIT license. | ||
// See the LICENSE file in the project root for more information. | ||
// | ||
// File: StringNative.cpp | ||
// | ||
|
||
// | ||
// Purpose: The implementation of the Utf8String class. | ||
// | ||
|
||
// | ||
|
||
#include "common.h" | ||
|
||
#include "object.h" | ||
#include "utilcode.h" | ||
#include "excep.h" | ||
#include "frames.h" | ||
#include "field.h" | ||
#include "vars.hpp" | ||
#include "utf8stringnative.h" | ||
#include "comutilnative.h" | ||
#include "metasig.h" | ||
#include "excep.h" | ||
|
||
// Compile the string functionality with these pragma flags (equivalent of the command line /Ox flag) | ||
// Compiling this functionality differently gives us significant throughout gain in some cases. | ||
#if defined(_MSC_VER) && defined(_TARGET_X86_) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is copy&paste that does not make sense given what's in this file. |
||
#pragma optimize("tgy", on) | ||
#endif | ||
|
||
FCIMPL1(Utf8StringObject *, Utf8StringFastAllocate, DWORD length) | ||
{ | ||
FCALL_CONTRACT; | ||
|
||
UTF8STRINGREF rv = NULL; // not protected | ||
|
||
HELPER_METHOD_FRAME_BEGIN_RET_1(rv); | ||
rv = SlowAllocateUtf8String(length); | ||
HELPER_METHOD_FRAME_END(); | ||
|
||
return UTF8STRINGREFToObject(rv); | ||
} | ||
FCIMPLEND | ||
|
||
|
||
// Revert to command line compilation flags | ||
#if defined(_MSC_VER) && defined(_TARGET_X86_) | ||
#pragma optimize ("", on) | ||
#endif |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
// Licensed to the .NET Foundation under one or more agreements. | ||
// The .NET Foundation licenses this file to you under the MIT license. | ||
// See the LICENSE file in the project root for more information. | ||
// | ||
// File: StringNative.h | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same here |
||
// | ||
|
||
// | ||
// Purpose: Contains types and method signatures for the Utf8String class | ||
// | ||
|
||
// | ||
|
||
#include "fcall.h" | ||
#include "qcall.h" | ||
#include "excep.h" | ||
|
||
#ifndef _UTF8STRINGNATIVE_H_ | ||
#define _UTF8STRINGNATIVE_H_ | ||
|
||
FCDECL1(Utf8StringObject *, Utf8StringFastAllocate, DWORD length); | ||
|
||
#endif // _UTF8STRINGNATIVE_H_ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
// Licensed to the .NET Foundation under one or more agreements. | ||
// The .NET Foundation licenses this file to you under the MIT license. | ||
// See the LICENSE file in the project root for more information. | ||
|
||
using System.Runtime.CompilerServices; | ||
|
||
namespace System | ||
{ | ||
// This is an experimental type and not referenced from CoreFx but needs to exists and be public so we can prototype in CoreFxLab. | ||
public sealed class Utf8String | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As we add interfaces ( There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not necessarily about interfaces. But the VM and JIT will certainly need to know more about this type to get decent performance. It goes back to my question on what you expect to get by adding this stub to CoreLib. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jkotas You're right in that we're not going to see full performance benchmarks until JIT support comes online, but this does get us at least partway there. Even when not JIT-optimized, the supplied allocation routine will still be faster and allocate less overall heap memory than calling two different ctors for a container + separate array. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could just be a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @benaadams We considered this for prototype purposes. But the end result is to have the final type be a class, and any data we'd collect in the meantime from a struct stand-in would be suspect. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
That is not correct. The allocation routine in this PR is super slow naive implementation. I just tried:
Run I am with Stephen that it would be best for a rough early prototype code like this to stay in a branch and not be merged into master. FWIW, |
||
{ | ||
// Do not reorder these fields. Must match layout of Utf8StringObject in object.h. | ||
private int _length; | ||
[CLSCompliant(false)] | ||
public byte _firstByte; // TODO: Is public for experimentation in CoreFxLab. Will be private in its ultimate form. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you really need this to be public? It would be better to have There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It allows the code in CoreFxLab to be written as instance members of Utf8String itself would be. Though if There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure, use |
||
|
||
private Utf8String() { } // Suppress creation of the public constructor. No one actually calls this. | ||
|
||
public int Length => _length; | ||
|
||
// Creates a new zero-initialized instance of the specified length. Actual storage allocated is "length + 1" bytes (the extra | ||
// +1 is for the NUL terminator.) | ||
[MethodImpl(MethodImplOptions.InternalCall)] | ||
public static extern Utf8String FastAllocate(int length); //TODO: Is public for experimentation in CoreFxLab. Will be private in its ultimate form. | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1059,6 +1059,73 @@ STRINGREF SlowAllocateString( DWORD cchStringLength ) | |
return( ObjectToSTRINGREF(orObject) ); | ||
} | ||
|
||
UTF8STRINGREF SlowAllocateUtf8String(DWORD cchStringLength) | ||
{ | ||
CONTRACTL{ | ||
THROWS; | ||
GC_TRIGGERS; | ||
MODE_COOPERATIVE; // returns an objref without pinning it => cooperative | ||
} CONTRACTL_END; | ||
|
||
Utf8StringObject *orObject = NULL; | ||
|
||
#ifdef _DEBUG | ||
if (g_pConfig->ShouldInjectFault(INJECTFAULT_GCHEAP)) | ||
{ | ||
char *a = new char; | ||
delete a; | ||
} | ||
#endif | ||
|
||
// Limit the maximum string size to <2GB to mitigate risk of security issues caused by 32-bit integer | ||
// overflows in buffer size calculations. | ||
if (cchStringLength > 0x7FFFFFDF) | ||
ThrowOutOfMemory(); | ||
|
||
SIZE_T ObjectSize = PtrAlign(Utf8StringObject::GetSize(cchStringLength)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this allocates more than required. I have noticed that we may have the same problem in regular string. For example:
Do you know why it is the case? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have checked CoreRT. CoreRT allocates 0x20 bytes in this case, so there is definitely something pretty broken. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Isn't there some padding between size and content? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is padding for regular arrays (so that all arrays have same layout even when elements are 8-byte aligned). There is no padding for strings. The content starts right after length. The extra unnecessary bytes are at the end. I remember folks went to a great length to ensure that strings do not pay the extra 4 bytes during the initial 64-bit ports. It looks like it has regressed. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's coming from Presumably the same for StringObject, though I want to keep anything like that separate from this PR. We haven't yet agreed to merge this into master. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. #17876 has the fix for the String overallocation problem. |
||
_ASSERTE(ObjectSize > cchStringLength); | ||
|
||
SetTypeHandleOnThreadForAlloc(TypeHandle(g_pUtf8StringClass)); | ||
|
||
orObject = (Utf8StringObject *)Alloc(ObjectSize, FALSE, FALSE); | ||
|
||
// Object is zero-init already | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This includes the null terminator being zero-inited, I suppose? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep. |
||
_ASSERTE(orObject->HasEmptySyncBlockInfo()); | ||
|
||
// Initialize Object | ||
orObject->SetMethodTable(g_pUtf8StringClass); | ||
orObject->SetUtf8StringLength(cchStringLength); | ||
|
||
if (ObjectSize >= LARGE_OBJECT_SIZE) | ||
{ | ||
GCHeapUtilities::GetGCHeap()->PublishObject((BYTE*)orObject); | ||
} | ||
|
||
// Notify the profiler of the allocation | ||
if (TrackAllocations()) | ||
{ | ||
OBJECTREF objref = ObjectToOBJECTREF((Object*)orObject); | ||
GCPROTECT_BEGIN(objref); | ||
ProfilerObjectAllocatedCallback(objref, (ClassID)orObject->GetTypeHandle().AsPtr()); | ||
GCPROTECT_END(); | ||
|
||
orObject = (Utf8StringObject *)OBJECTREFToObject(objref); | ||
} | ||
|
||
#ifdef FEATURE_EVENT_TRACE | ||
// Send ETW event for allocation | ||
if (ETW::TypeSystemLog::IsHeapAllocEventEnabled()) | ||
{ | ||
ETW::TypeSystemLog::SendObjectAllocatedEvent(orObject); | ||
} | ||
#endif // FEATURE_EVENT_TRACE | ||
|
||
LogAlloc(ObjectSize, g_pUtf8StringClass, orObject); | ||
|
||
return(ObjectToUTF8STRINGREF(orObject)); | ||
} | ||
|
||
|
||
#ifdef FEATURE_COMINTEROP_UNMANAGED_ACTIVATION | ||
// OBJECTREF AllocateComClassObject(ComClassFactory* pComClsFac) | ||
void AllocateComClassObject(ComClassFactory* pComClsFac, OBJECTREF* ppRefClass) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9744,6 +9744,17 @@ void MethodTableBuilder::CheckForSystemTypes() | |
|
||
pMT->SetComponentSize(2); | ||
} | ||
else if (strcmp(name, g_Utf8StringName) == 0 && strcmp(nameSpace, g_SystemNS) == 0) | ||
{ | ||
// Utf8Strings are not "normal" objects, so we need to mess with their method table a bit | ||
// so that the GC can figure out how big each string is... | ||
DWORD baseSize = ObjSizeOf(Utf8StringObject) + sizeof(BYTE); | ||
pMT->SetBaseSize(baseSize); // NULL character included | ||
|
||
GetHalfBakedClass()->SetBaseSizePadding(baseSize - bmtFP->NumInstanceFieldBytes); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
||
pMT->SetComponentSize(1); | ||
} | ||
else if (strcmp(name, g_CriticalFinalizerObjectName) == 0 && strcmp(nameSpace, g_ConstrainedExecutionNS) == 0) | ||
{ | ||
// To introduce a class with a critical finalizer, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -866,6 +866,7 @@ typedef DPTR(U8Array) PTR_U8Array; | |
typedef DPTR(PTRArray) PTR_PTRArray; | ||
|
||
class StringObject; | ||
class Utf8StringObject; | ||
|
||
#ifdef USE_CHECKED_OBJECTREFS | ||
typedef REF<ArrayBase> BASEARRAYREF; | ||
|
@@ -883,6 +884,7 @@ typedef REF<U8Array> U8ARRAYREF; | |
typedef REF<CHARArray> CHARARRAYREF; | ||
typedef REF<PTRArray> PTRARRAYREF; // Warning: Use PtrArray only for single dimensional arrays, not multidim arrays. | ||
typedef REF<StringObject> STRINGREF; | ||
typedef REF<Utf8StringObject> UTF8STRINGREF; | ||
|
||
#else // USE_CHECKED_OBJECTREFS | ||
|
||
|
@@ -901,6 +903,7 @@ typedef PTR_U8Array U8ARRAYREF; | |
typedef PTR_CHARArray CHARARRAYREF; | ||
typedef PTR_PTRArray PTRARRAYREF; // Warning: Use PtrArray only for single dimensional arrays, not multidim arrays. | ||
typedef PTR_StringObject STRINGREF; | ||
typedef PTR_Utf8StringObject UTF8STRINGREF; | ||
|
||
#endif // USE_CHECKED_OBJECTREFS | ||
|
||
|
@@ -935,6 +938,7 @@ typedef PTR_StringObject STRINGREF; | |
#ifdef _MSC_VER | ||
#pragma warning(disable : 4200) // disable zero-sized array warning | ||
#endif | ||
|
||
class StringObject : public Object | ||
{ | ||
#ifdef DACCESS_COMPILE | ||
|
@@ -1205,6 +1209,32 @@ class ReflectClassBaseObject : public BaseObjectWithCachedData | |
|
||
}; | ||
|
||
class Utf8StringObject : public Object | ||
{ | ||
#ifdef DACCESS_COMPILE | ||
friend class ClrDataAccess; | ||
#endif | ||
|
||
private: | ||
DWORD m_StringLength; | ||
BYTE m_Characters[0]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For the String over-allocation fix, I have changed this to |
||
// GC will see a Utf8StringObject like this: | ||
// DWORD m_StringLength | ||
// BYTE m_Characters[0] | ||
// DWORD m_OptionalPadding (this is an optional field and will appear based on need) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This comment about |
||
|
||
public: | ||
VOID SetUtf8StringLength(DWORD len) { LIMITED_METHOD_CONTRACT; _ASSERTE(len >= 0); m_StringLength = len; } | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should be called just SetLength, I think. The managed side has Length property, not Utf8Length property. |
||
|
||
protected: | ||
Utf8StringObject() { LIMITED_METHOD_CONTRACT; } | ||
~Utf8StringObject() { LIMITED_METHOD_CONTRACT; } | ||
|
||
public: | ||
static SIZE_T GetSize(DWORD stringLength); | ||
}; | ||
|
||
|
||
// This is the Method version of the Reflection object. | ||
// A Method has adddition information. | ||
// m_pMD - A pointer to the actual MethodDesc of the method. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does not match the actual name. Better to just delete - it is useless comment.