Skip to content
This repository has been archived by the owner on Jan 23, 2023. It is now read-only.

Switch over to managed Marvin implementation for string hashing #17029

Merged
merged 3 commits into from
Mar 19, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 0 additions & 25 deletions src/classlibnative/bcltype/stringnative.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,31 +31,6 @@
#pragma optimize("tgy", on)
#endif

inline COMNlsHashProvider * GetCurrentNlsHashProvider()
{
LIMITED_METHOD_CONTRACT;
return &COMNlsHashProvider::s_NlsHashProvider;
}

FCIMPL1(INT32, COMString::Marvin32HashString, StringObject* thisRefUNSAFE) {
FCALL_CONTRACT;

int iReturnHash = 0;

if (thisRefUNSAFE == NULL) {
FCThrow(kNullReferenceException);
}

BEGIN_SO_INTOLERANT_CODE_NOTHROW(GetThread(), FCThrow(kStackOverflowException));
iReturnHash = GetCurrentNlsHashProvider()->HashString(thisRefUNSAFE->GetBuffer(), thisRefUNSAFE->GetStringLength());
END_SO_INTOLERANT_CODE;

FC_GC_POLL_RET();

return iReturnHash;
}
FCIMPLEND

/*===============================IsFastSort===============================
**Action: Call the helper to walk the string and see if we have any high chars.
**Returns: void. The appropriate bits are set on the String.
Expand Down
3 changes: 0 additions & 3 deletions src/classlibnative/bcltype/stringnative.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,6 @@ class COMString {
static FCDECL2(FC_BOOL_RET, FCTryGetTrailByte, StringObject* thisRefUNSAFE, UINT8 *pbData);
static FCDECL2(VOID, FCSetTrailByte, StringObject* thisRefUNSAFE, UINT8 bData);
#endif // FEATURE_COMINTEROP

static FCDECL1(INT32, Marvin32HashString, StringObject* thisRefUNSAFE);

};

// Revert to command line compilation flags
Expand Down
66 changes: 0 additions & 66 deletions src/classlibnative/nls/nlsinfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,72 +34,6 @@
#include "nls.h"
#include "nlsinfo.h"

//
// Constant Declarations.
//

#define MAX_STRING_VALUE 512

////////////////////////////////////////////////////////////////////////////
//
// InternalGetGlobalizedHashCode
//
////////////////////////////////////////////////////////////////////////////
INT32 QCALLTYPE COMNlsInfo::InternalGetGlobalizedHashCode(INT_PTR handle, LPCWSTR localeName, LPCWSTR string, INT32 length, INT32 dwFlagsIn)
{
CONTRACTL
{
QCALL_CHECK;
PRECONDITION(CheckPointer(localeName));
PRECONDITION(CheckPointer(string, NULL_OK));
} CONTRACTL_END;

INT32 iReturnHash = 0;
BEGIN_QCALL;

int byteCount = 0;

//
// Make sure there is a string.
//
if (!string) {
COMPlusThrowArgumentNull(W("string"),W("ArgumentNull_String"));
}

DWORD dwFlags = (LCMAP_SORTKEY | dwFlagsIn);

//
// Caller has already verified that the string is not of zero length
//
// Assert if we might hit an AV in LCMapStringEx for the invariant culture.
_ASSERTE(length > 0 || (dwFlags & LCMAP_LINGUISTIC_CASING) == 0);
{
byteCount=::LCMapStringEx(handle != NULL ? NULL : localeName, dwFlags, string, length, NULL, 0, NULL, NULL, (LPARAM) handle);
}

//A count of 0 indicates that we either had an error or had a zero length string originally.
if (byteCount==0)
{
COMPlusThrow(kArgumentException, W("Arg_MustBeString"));
}

// We used to use a NewArrayHolder here, but it turns out that hurts our large # process
// scalability in ASP.Net hosting scenarios, using the quick bytes instead mostly stack
// allocates and ups throughput by 8% in 100 process case, 5% in 1000 process case
{
CQuickBytesSpecifySize<MAX_STRING_VALUE * sizeof(WCHAR)> qbBuffer;
BYTE* pByte = (BYTE*)qbBuffer.AllocThrows(byteCount);

{
::LCMapStringEx(handle != NULL ? NULL : localeName, dwFlags, string, length, (LPWSTR)pByte, byteCount, NULL,NULL, (LPARAM) handle);
}

iReturnHash = COMNlsHashProvider::s_NlsHashProvider.HashSortKey(pByte, byteCount);
}
END_QCALL;
return(iReturnHash);
}

/**
* This function returns a pointer to this table that we use in System.Globalization.EncodingTable.
* No error checking of any sort is performed. Range checking is entirely the responsibility of the managed
Expand Down
85 changes: 0 additions & 85 deletions src/inc/marvin32.h

This file was deleted.

34 changes: 21 additions & 13 deletions src/mscorlib/src/System/Globalization/CompareInfo.Unix.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System.Buffers;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
Expand Down Expand Up @@ -732,7 +733,10 @@ private unsafe SortKey CreateSortKey(String source, CompareOptions options)

fixed (byte* pSortKey = keyData)
{
Interop.Globalization.GetSortKey(_sortHandle, source, source.Length, pSortKey, sortKeyLength, options);
if (Interop.Globalization.GetSortKey(_sortHandle, source, source.Length, pSortKey, sortKeyLength, options) != sortKeyLength)
{
throw new ArgumentException(SR.Arg_ExternalException);
}
}
}

Expand Down Expand Up @@ -796,25 +800,29 @@ internal unsafe int GetHashCodeOfStringCore(string source, CompareOptions option

int sortKeyLength = Interop.Globalization.GetSortKey(_sortHandle, source, source.Length, null, 0, options);

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Presumably we could supply a buffer that would often be big enough and avoid the need to call Getsortkey twice.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, there are many performance improvements that can be done in the globalization implementation. Not doing them in this PR to make it scoped.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hashing is possibly the most perf sensitive path in globalization? Perhaps it's worth an issue opened.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

// As an optimization, for small sort keys we allocate the buffer on the stack.
if (sortKeyLength <= 256)
byte[] borrowedArr = null;
Span<byte> span = sortKeyLength <= 512 ?
stackalloc byte[512] :
(borrowedArr = ArrayPool<byte>.Shared.Rent(sortKeyLength));

fixed (byte* pSortKey = &MemoryMarshal.GetReference(span))
{
byte* pSortKey = stackalloc byte[sortKeyLength];
Interop.Globalization.GetSortKey(_sortHandle, source, source.Length, pSortKey, sortKeyLength, options);
return InternalHashSortKey(pSortKey, sortKeyLength);
if (Interop.Globalization.GetSortKey(_sortHandle, source, source.Length, pSortKey, sortKeyLength, options) != sortKeyLength)
{
throw new ArgumentException(SR.Arg_ExternalException);
}
}

byte[] sortKey = new byte[sortKeyLength];
int hash = Marvin.ComputeHash32(span.Slice(0, sortKeyLength), Marvin.DefaultSeed);

fixed (byte* pSortKey = sortKey)
// Return the borrowed array if necessary.
if (borrowedArr != null)
{
Interop.Globalization.GetSortKey(_sortHandle, source, source.Length, pSortKey, sortKeyLength, options);
return InternalHashSortKey(pSortKey, sortKeyLength);
ArrayPool<byte>.Shared.Return(borrowedArr);
}
}

[DllImport(JitHelpers.QCall)]
private static extern unsafe int InternalHashSortKey(byte* sortKey, int sortKeyLength);
return hash;
}

private static CompareOptions GetOrdinalCompareOptions(CompareOptions options)
{
Expand Down
79 changes: 51 additions & 28 deletions src/mscorlib/src/System/Globalization/CompareInfo.Windows.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System.Security;
using System.Buffers;
using System.Diagnostics;
using System.Runtime.InteropServices;
using System.Security;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

namespace System.Globalization
{
Expand Down Expand Up @@ -120,24 +121,47 @@ private unsafe int GetHashCodeOfStringCore(string source, CompareOptions options
return 0;
}

int flags = GetNativeCompareFlags(options);
int tmpHash = 0;
#if CORECLR
tmpHash = InternalGetGlobalizedHashCode(_sortHandle, _sortName, source, source.Length, flags);
#else
uint flags = LCMAP_SORTKEY | (uint)GetNativeCompareFlags(options);

fixed (char* pSource = source)
{
if (Interop.Kernel32.LCMapStringEx(_sortHandle != IntPtr.Zero ? null : _sortName,
LCMAP_HASH | (uint)flags,
int sortKeyLength = Interop.Kernel32.LCMapStringEx(_sortHandle != IntPtr.Zero ? null : _sortName,
flags,
pSource, source.Length,
&tmpHash, sizeof(int),
null, null, _sortHandle) == 0)
null, 0,
null, null, _sortHandle);
if (sortKeyLength == 0)
{
Environment.FailFast("LCMapStringEx failed!");
throw new ArgumentException(SR.Arg_ExternalException);
}

byte[] borrowedArr = null;
Span<byte> span = sortKeyLength <= 512 ?
stackalloc byte[512] :
(borrowedArr = ArrayPool<byte>.Shared.Rent(sortKeyLength));

fixed (byte* pSortKey = &MemoryMarshal.GetReference(span))
{
if (Interop.Kernel32.LCMapStringEx(_sortHandle != IntPtr.Zero ? null : _sortName,
flags,
pSource, source.Length,
null, 0,
null, null, _sortHandle) != sortKeyLength)
{
throw new ArgumentException(SR.Arg_ExternalException);
}
}

int hash = Marvin.ComputeHash32(span.Slice(0, sortKeyLength), Marvin.DefaultSeed);

// Return the borrowed array if necessary.
if (borrowedArr != null)
{
ArrayPool<byte>.Shared.Return(borrowedArr);
}

return hash;
}
#endif
return tmpHash;
}

private static unsafe int CompareStringOrdinalIgnoreCase(char* string1, int count1, char* string2, int count2)
Expand Down Expand Up @@ -516,27 +540,32 @@ private unsafe SortKey CreateSortKey(String source, CompareOptions options)
}
else
{
uint flags = LCMAP_SORTKEY | (uint)GetNativeCompareFlags(options);

fixed (char *pSource = source)
{
int result = Interop.Kernel32.LCMapStringEx(_sortHandle != IntPtr.Zero ? null : _sortName,
LCMAP_SORTKEY | (uint) GetNativeCompareFlags(options),
int sortKeyLength = Interop.Kernel32.LCMapStringEx(_sortHandle != IntPtr.Zero ? null : _sortName,
flags,
pSource, source.Length,
null, 0,
null, null, _sortHandle);
if (result == 0)
if (sortKeyLength == 0)
{
throw new ArgumentException(SR.Argument_InvalidFlag, "source");
throw new ArgumentException(SR.Arg_ExternalException);
}

keyData = new byte[result];
keyData = new byte[sortKeyLength];

fixed (byte* pBytes = keyData)
{
result = Interop.Kernel32.LCMapStringEx(_sortHandle != IntPtr.Zero ? null : _sortName,
LCMAP_SORTKEY | (uint) GetNativeCompareFlags(options),
if (Interop.Kernel32.LCMapStringEx(_sortHandle != IntPtr.Zero ? null : _sortName,
flags,
pSource, source.Length,
pBytes, keyData.Length,
null, null, _sortHandle);
null, null, _sortHandle) != sortKeyLength)
{
throw new ArgumentException(SR.Arg_ExternalException);
}
}
}
}
Expand Down Expand Up @@ -601,11 +630,5 @@ private unsafe SortVersion GetSortVersion()
nlsVersion.dwEffectiveId == 0 ? LCID : nlsVersion.dwEffectiveId,
nlsVersion.guidCustomVersion);
}

#if CORECLR
// Get a locale sensitive sort hash code from native code -- COMNlsInfo::InternalGetGlobalizedHashCode
[DllImport(JitHelpers.QCall, CharSet = CharSet.Unicode)]
private static extern int InternalGetGlobalizedHashCode(IntPtr handle, string localeName, string source, int length, int dwFlags);
#endif
}
}
Loading