From 4b042653307d8c7b4883280c792abc7e64a56dab Mon Sep 17 00:00:00 2001 From: Meri Khamoyan Date: Mon, 27 Nov 2023 13:22:15 +0100 Subject: [PATCH 01/12] Implement GetSortKey on hybrid mode --- .../src/Interop/Interop.Collation.iOS.cs | 3 + .../System/Globalization/CompareInfo.Icu.cs | 72 +++++++++++++++++-- .../src/System/Globalization/CompareInfo.cs | 8 +-- .../CompareInfo/CompareInfoTests.cs | 2 +- .../System.Globalization.Tests.csproj | 1 + .../System.Globalization.Native/entrypoints.c | 1 + .../pal_collation.h | 10 ++- .../pal_collation.m | 47 ++++++++++++ 8 files changed, 131 insertions(+), 13 deletions(-) diff --git a/src/libraries/Common/src/Interop/Interop.Collation.iOS.cs b/src/libraries/Common/src/Interop/Interop.Collation.iOS.cs index 70e907efa68a10..2dba31ddcd70ee 100644 --- a/src/libraries/Common/src/Interop/Interop.Collation.iOS.cs +++ b/src/libraries/Common/src/Interop/Interop.Collation.iOS.cs @@ -29,5 +29,8 @@ internal static partial class Globalization [LibraryImport(Libraries.GlobalizationNative, EntryPoint = "GlobalizationNative_StartsWithNative", StringMarshalling = StringMarshalling.Utf16)] [MethodImpl(MethodImplOptions.NoInlining)] internal static unsafe partial int StartsWithNative(string localeName, int lNameLen, char* target, int cwTargetLength, char* source, int cwSourceLength, CompareOptions options); + + [LibraryImport(Libraries.GlobalizationNative, EntryPoint = "GlobalizationNative_GetSortKeyNative", StringMarshalling = StringMarshalling.Utf16)] + internal static unsafe partial int GetSortKeyNative(string localeName, int lNameLen, char* str, int strLength, byte* sortKey, int sortKeyLength, CompareOptions options); } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Icu.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Icu.cs index 7a4fb0289c464b..14d75a367fdc01 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Icu.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Icu.cs @@ -700,14 +700,36 @@ private unsafe SortKey IcuCreateSortKey(string source, CompareOptions options) byte[] keyData; fixed (char* pSource = source) { - int sortKeyLength = Interop.Globalization.GetSortKey(_sortHandle, pSource, source.Length, null, 0, options); + int sortKeyLength; +#if TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS + if (GlobalizationMode.Hybrid) + { + sortKeyLength = Interop.Globalization.GetSortKeyNative(m_name, m_name.Length, pSource, source.Length, null, 0, options); + } + else +#endif + { + sortKeyLength = Interop.Globalization.GetSortKey(_sortHandle, pSource, source.Length, null, 0, options); + } keyData = new byte[sortKeyLength]; fixed (byte* pSortKey = keyData) { - if (Interop.Globalization.GetSortKey(_sortHandle, pSource, source.Length, pSortKey, sortKeyLength, options) != sortKeyLength) +#if TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS + if (GlobalizationMode.Hybrid) { - throw new ArgumentException(SR.Arg_ExternalException); + if (Interop.Globalization.GetSortKey(_sortHandle, pSource, source.Length, null, 0, options) != sortKeyLength) + { + throw new ArgumentException(SR.Arg_ExternalException); + } + } + else +#endif + { + if (Interop.Globalization.GetSortKey(_sortHandle, pSource, source.Length, pSortKey, sortKeyLength, options) != sortKeyLength) + { + throw new ArgumentException(SR.Arg_ExternalException); + } } } } @@ -728,7 +750,16 @@ private unsafe int IcuGetSortKey(ReadOnlySpan source, Span destinati fixed (char* pSource = &MemoryMarshal.GetReference(source)) fixed (byte* pDest = &MemoryMarshal.GetReference(destination)) { - actualSortKeyLength = Interop.Globalization.GetSortKey(_sortHandle, pSource, source.Length, pDest, destination.Length, options); +#if TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS + if (GlobalizationMode.Hybrid) + { + actualSortKeyLength = Interop.Globalization.GetSortKeyNative(m_name, m_name.Length, pSource, source.Length, pDest, destination.Length, options); + } + else +#endif + { + actualSortKeyLength = Interop.Globalization.GetSortKey(_sortHandle, pSource, source.Length, pDest, destination.Length, options); + } } // The check below also handles errors due to negative values / overflow being returned. @@ -758,7 +789,16 @@ private unsafe int IcuGetSortKeyLength(ReadOnlySpan source, CompareOptions fixed (char* pSource = &MemoryMarshal.GetReference(source)) { - return Interop.Globalization.GetSortKey(_sortHandle, pSource, source.Length, null, 0, options); +#if TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS + if (GlobalizationMode.Hybrid) + { + return Interop.Globalization.GetSortKeyNative(m_name, m_name.Length, pSource, source.Length, null, 0, options); + } + else +#endif + { + return Interop.Globalization.GetSortKey(_sortHandle, pSource, source.Length, null, 0, options); + } } } @@ -809,7 +849,16 @@ private unsafe int IcuGetHashCodeOfString(ReadOnlySpan source, CompareOpti { fixed (byte* pSortKey = &MemoryMarshal.GetReference(sortKey)) { - sortKeyLength = Interop.Globalization.GetSortKey(_sortHandle, pSource, source.Length, pSortKey, sortKey.Length, options); +#if TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS + if (GlobalizationMode.Hybrid) + { + sortKeyLength = Interop.Globalization.GetSortKeyNative(m_name, m_name.Length, pSource, source.Length, pSortKey, sortKey.Length, options); + } + else +#endif + { + sortKeyLength = Interop.Globalization.GetSortKey(_sortHandle, pSource, source.Length, pSortKey, sortKey.Length, options); + } } if (sortKeyLength > sortKey.Length) // slow path for big strings @@ -823,7 +872,16 @@ private unsafe int IcuGetHashCodeOfString(ReadOnlySpan source, CompareOpti fixed (byte* pSortKey = &MemoryMarshal.GetReference(sortKey)) { - sortKeyLength = Interop.Globalization.GetSortKey(_sortHandle, pSource, source.Length, pSortKey, sortKey.Length, options); +#if TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS + if (GlobalizationMode.Hybrid) + { + sortKeyLength = Interop.Globalization.GetSortKeyNative(m_name, m_name.Length, pSource, source.Length, pSortKey, sortKey.Length, options); + } + else + #endif + { + sortKeyLength = Interop.Globalization.GetSortKey(_sortHandle, pSource, source.Length, pSortKey, sortKey.Length, options); + } } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.cs index 1d7a313d14addf..538e4b3551ca18 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.cs @@ -1450,7 +1450,7 @@ public SortKey GetSortKey(string source) private SortKey CreateSortKeyCore(string source, CompareOptions options) => GlobalizationMode.UseNls ? NlsCreateSortKey(source, options) : -#if TARGET_BROWSER || TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS +#if TARGET_BROWSER GlobalizationMode.Hybrid ? throw new PlatformNotSupportedException(GetPNSEText("SortKey")) : #endif @@ -1493,7 +1493,7 @@ public int GetSortKey(ReadOnlySpan source, Span destination, Compare private int GetSortKeyCore(ReadOnlySpan source, Span destination, CompareOptions options) => GlobalizationMode.UseNls ? NlsGetSortKey(source, destination, options) : -#if TARGET_BROWSER || TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS +#if TARGET_BROWSER GlobalizationMode.Hybrid ? throw new PlatformNotSupportedException(GetPNSEText("SortKey")) : #endif @@ -1530,7 +1530,7 @@ public int GetSortKeyLength(ReadOnlySpan source, CompareOptions options = private int GetSortKeyLengthCore(ReadOnlySpan source, CompareOptions options) => GlobalizationMode.UseNls ? NlsGetSortKeyLength(source, options) : -#if TARGET_BROWSER || TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS +#if TARGET_BROWSER GlobalizationMode.Hybrid ? throw new PlatformNotSupportedException(GetPNSEText("SortKey")) : #endif @@ -1607,7 +1607,7 @@ public int GetHashCode(ReadOnlySpan source, CompareOptions options) private unsafe int GetHashCodeOfStringCore(ReadOnlySpan source, CompareOptions options) => GlobalizationMode.UseNls ? NlsGetHashCodeOfString(source, options) : -#if TARGET_BROWSER || TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS +#if TARGET_BROWSER GlobalizationMode.Hybrid ? throw new PlatformNotSupportedException(GetPNSEText("HashCode")) : #endif diff --git a/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.cs b/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.cs index cbd011b96fa0cb..8b84c416ba11fb 100644 --- a/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.cs +++ b/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.cs @@ -352,7 +352,7 @@ public void SortKeyKanaTest(CompareInfo compareInfo, string string1, string stri SortKeyTest(compareInfo, string1, string2, options, expected); } - [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsHybridGlobalization))] + [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsHybridGlobalizationOnBrowser))] public void SortKeyTestNotSupported() { try diff --git a/src/libraries/System.Runtime/tests/System.Globalization.Tests/System.Globalization.Tests.csproj b/src/libraries/System.Runtime/tests/System.Globalization.Tests/System.Globalization.Tests.csproj index e5ba39e62b7a70..8290f213f117d3 100644 --- a/src/libraries/System.Runtime/tests/System.Globalization.Tests/System.Globalization.Tests.csproj +++ b/src/libraries/System.Runtime/tests/System.Globalization.Tests/System.Globalization.Tests.csproj @@ -4,6 +4,7 @@ true true $(NetCoreAppCurrent) + true 15.0 diff --git a/src/native/libs/System.Globalization.Native/entrypoints.c b/src/native/libs/System.Globalization.Native/entrypoints.c index 1f345991505fa4..bc173b1c2c13e9 100644 --- a/src/native/libs/System.Globalization.Native/entrypoints.c +++ b/src/native/libs/System.Globalization.Native/entrypoints.c @@ -74,6 +74,7 @@ static const Entry s_globalizationNative[] = DllImportEntry(GlobalizationNative_GetLocaleNameNative) DllImportEntry(GlobalizationNative_GetLocalesNative) DllImportEntry(GlobalizationNative_GetLocaleTimeFormatNative) + DllImportEntry(GlobalizationNative_GetSortKeyNative) DllImportEntry(GlobalizationNative_GetTimeZoneDisplayNameNative) DllImportEntry(GlobalizationNative_IndexOfNative) DllImportEntry(GlobalizationNative_IsNormalizedNative) diff --git a/src/native/libs/System.Globalization.Native/pal_collation.h b/src/native/libs/System.Globalization.Native/pal_collation.h index a8b44ba164f4a0..197da249a0a047 100644 --- a/src/native/libs/System.Globalization.Native/pal_collation.h +++ b/src/native/libs/System.Globalization.Native/pal_collation.h @@ -98,6 +98,14 @@ PALEXPORT int32_t GlobalizationNative_EndsWithNative(const uint16_t* localeName, int32_t cwSuffixLength, const uint16_t* lpSource, int32_t cwSourceLength, - int32_t options); + int32_t options); + +PALEXPORT int32_t GlobalizationNative_GetSortKeyNative(const uint16_t* localeName, + int32_t lNameLength, + const UChar* lpStr, + int32_t cwStrLength, + uint8_t* sortKey, + int32_t cbSortKeyLength, + int32_t options); #endif diff --git a/src/native/libs/System.Globalization.Native/pal_collation.m b/src/native/libs/System.Globalization.Native/pal_collation.m index 0c7872725af2ae..ba621b9177506b 100644 --- a/src/native/libs/System.Globalization.Native/pal_collation.m +++ b/src/native/libs/System.Globalization.Native/pal_collation.m @@ -295,4 +295,51 @@ int32_t GlobalizationNative_EndsWithNative(const uint16_t* localeName, int32_t l } } +int32_t GlobalizationNative_GetSortKeyNative(const uint16_t* localeName, int32_t lNameLength, const UChar* lpStr, int32_t cwStrLength, + uint8_t* sortKey, int32_t cbSortKeyLength, int32_t options) +{ + @autoreleasepool { + if (cwStrLength == 0) + { + sortKey = malloc(1); + sortKey[0] = '\0'; + return 1; + } + NSString *sourceString = [NSString stringWithCharacters: lpStr length: cwStrLength]; + NSString *sourceStringCleaned = RemoveWeightlessCharacters(sourceString); + + NSLocale *locale = GetCurrentLocale(localeName, lNameLength); + NSStringCompareOptions comparisonOptions = options == 0 ? 0 : ConvertFromCompareOptionsToNSStringCompareOptions(options); + + // Generate a sort key for the original string based on the locale + NSString *transformedString = [sourceStringCleaned stringByFoldingWithOptions:comparisonOptions locale:locale]; + + // Convert the string to UTF-8 representation + const char *utf8Bytes = [transformedString UTF8String]; + if (utf8Bytes != NULL) { + NSUInteger utf8Length = [transformedString lengthOfBytesUsingEncoding:NSUTF8StringEncoding]; + memcpy(sortKey, utf8Bytes, utf8Length); + return utf8Length; + } + else + { + // Convert the string to UTF-16 representation + NSData *utf16Data = [transformedString dataUsingEncoding:NSUTF16StringEncoding]; + + if (utf16Data != nil) { + const uint16_t *utf16Bytes = (const uint16_t *)[utf16Data bytes]; + NSUInteger utf16Length = [utf16Data length] / sizeof(uint16_t); + + if (sortKey != NULL) { + // Convert UTF-16 to UTF-8 manually + memcpy(sortKey, utf16Bytes, utf16Length * 2); + return utf16Length * 2; + } + } + } + + return 0; + } +} + #endif From cdc0030a70329001f2191f905497cbc37fa6c754 Mon Sep 17 00:00:00 2001 From: Meri Khamoyan Date: Mon, 27 Nov 2023 17:52:53 +0100 Subject: [PATCH 02/12] Update tests --- .../System/StringGetHashCodeTests.cs | 3 ++- .../System/StringTests.cs | 20 +++++++++---------- .../pal_collation.m | 8 +++++--- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/StringGetHashCodeTests.cs b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/StringGetHashCodeTests.cs index 33820345f65bd5..b37ba0a5d051fb 100644 --- a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/StringGetHashCodeTests.cs +++ b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/StringGetHashCodeTests.cs @@ -89,7 +89,8 @@ public static IEnumerable GetHashCodeOrdinalIgnoreCase_TestData() { yield return new object[] { "AaBbCcDdEeFfGgHh".Insert(i, "\u00E9" /* LATIN SMALL LETTER E WITH ACUTE */) }; yield return new object[] { "AaBbCcDdEeFfGgHh".Insert(i, "\u044D" /* CYRILLIC SMALL LETTER E */) }; - yield return new object[] { "AaBbCcDdEeFfGgHh".Insert(i, "\u0131" /* LATIN SMALL LETTER DOTLESS I */) }; + if (PlatformDetection.IsNotHybridGlobalizationOnOSX) + yield return new object[] { "AaBbCcDdEeFfGgHh".Insert(i, "\u0131" /* LATIN SMALL LETTER DOTLESS I */) }; } // Various texts copied from Microsoft's non-U.S. home pages, for further localization tests diff --git a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/StringTests.cs b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/StringTests.cs index c5f876417d214c..43039a4067945e 100644 --- a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/StringTests.cs +++ b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/StringTests.cs @@ -232,7 +232,7 @@ public static IEnumerable Contains_String_StringComparison_TestData() yield return new object[] { "Hello", "", StringComparison.CurrentCulture, true }; yield return new object[] { "Hello", "Ell" + SoftHyphen, StringComparison.CurrentCulture, false }; - if (PlatformDetection.IsNotInvariantGlobalization) + if (PlatformDetection.IsNotInvariantGlobalization && PlatformDetection.IsNotHybridGlobalizationOnOSX) yield return new object[] { "Hello", "ell" + SoftHyphen, StringComparison.CurrentCulture, true }; // CurrentCultureIgnoreCase @@ -245,7 +245,7 @@ public static IEnumerable Contains_String_StringComparison_TestData() yield return new object[] { "", "hello", StringComparison.CurrentCultureIgnoreCase, false }; yield return new object[] { "Hello", "", StringComparison.CurrentCultureIgnoreCase, true }; - if (PlatformDetection.IsNotInvariantGlobalization) + if (PlatformDetection.IsNotInvariantGlobalization && PlatformDetection.IsNotHybridGlobalizationOnOSX) { yield return new object[] { "Hello", "ell" + SoftHyphen, StringComparison.CurrentCultureIgnoreCase, true }; yield return new object[] { "Hello", "Ell" + SoftHyphen, StringComparison.CurrentCultureIgnoreCase, true }; @@ -262,7 +262,7 @@ public static IEnumerable Contains_String_StringComparison_TestData() yield return new object[] { "Hello", "", StringComparison.InvariantCulture, true }; yield return new object[] { "Hello", "Ell" + SoftHyphen, StringComparison.InvariantCulture, false }; - if (PlatformDetection.IsNotInvariantGlobalization) + if (PlatformDetection.IsNotInvariantGlobalization && PlatformDetection.IsNotHybridGlobalizationOnOSX) yield return new object[] { "Hello", "ell" + SoftHyphen, StringComparison.InvariantCulture, true }; // InvariantCultureIgnoreCase @@ -275,7 +275,7 @@ public static IEnumerable Contains_String_StringComparison_TestData() yield return new object[] { "", "hello", StringComparison.InvariantCultureIgnoreCase, false }; yield return new object[] { "Hello", "", StringComparison.InvariantCultureIgnoreCase, true }; - if (PlatformDetection.IsNotInvariantGlobalization) + if (PlatformDetection.IsNotInvariantGlobalization && PlatformDetection.IsNotHybridGlobalizationOnOSX) { yield return new object[] { "Hello", "ell" + SoftHyphen, StringComparison.InvariantCultureIgnoreCase, true }; yield return new object[] { "Hello", "Ell" + SoftHyphen, StringComparison.InvariantCultureIgnoreCase, true }; @@ -699,7 +699,7 @@ public static IEnumerable Replace_StringComparison_TestData() yield return new object[] { "abc", "b", "d", StringComparison.CurrentCulture, "adc" }; yield return new object[] { "abc", "b", null, StringComparison.CurrentCulture, "ac" }; - if (PlatformDetection.IsNotInvariantGlobalization) + if (PlatformDetection.IsNotInvariantGlobalization && PlatformDetection.IsNotHybridGlobalizationOnOSX) yield return new object[] { "abc", "abc" + SoftHyphen, "def", StringComparison.CurrentCulture, "def" }; yield return new object[] { "abc", "abc", "def", StringComparison.CurrentCultureIgnoreCase, "def" }; @@ -709,7 +709,7 @@ public static IEnumerable Replace_StringComparison_TestData() yield return new object[] { "abc", "b", "d", StringComparison.CurrentCultureIgnoreCase, "adc" }; yield return new object[] { "abc", "b", null, StringComparison.CurrentCultureIgnoreCase, "ac" }; - if (PlatformDetection.IsNotInvariantGlobalization) + if (PlatformDetection.IsNotInvariantGlobalization && PlatformDetection.IsNotHybridGlobalizationOnOSX) yield return new object[] { "abc", "abc" + SoftHyphen, "def", StringComparison.CurrentCultureIgnoreCase, "def" }; yield return new object[] { "abc", "abc", "def", StringComparison.Ordinal, "def" }; @@ -719,7 +719,7 @@ public static IEnumerable Replace_StringComparison_TestData() yield return new object[] { "abc", "b", "d", StringComparison.Ordinal, "adc" }; yield return new object[] { "abc", "b", null, StringComparison.Ordinal, "ac" }; - if (PlatformDetection.IsNotInvariantGlobalization) + if (PlatformDetection.IsNotInvariantGlobalization && PlatformDetection.IsNotHybridGlobalizationOnOSX) yield return new object[] { "abc", "abc" + SoftHyphen, "def", StringComparison.Ordinal, "abc" }; yield return new object[] { "abc", "abc", "def", StringComparison.OrdinalIgnoreCase, "def" }; @@ -730,7 +730,7 @@ public static IEnumerable Replace_StringComparison_TestData() yield return new object[] { "abc", "b", null, StringComparison.OrdinalIgnoreCase, "ac" }; - if (PlatformDetection.IsNotInvariantGlobalization) + if (PlatformDetection.IsNotInvariantGlobalization && PlatformDetection.IsNotHybridGlobalizationOnOSX) yield return new object[] { "abc", "abc" + SoftHyphen, "def", StringComparison.OrdinalIgnoreCase, "abc" }; yield return new object[] { "abc", "abc", "def", StringComparison.InvariantCulture, "def" }; @@ -741,7 +741,7 @@ public static IEnumerable Replace_StringComparison_TestData() yield return new object[] { "abc", "b", null, StringComparison.InvariantCulture, "ac" }; - if (PlatformDetection.IsNotInvariantGlobalization) + if (PlatformDetection.IsNotInvariantGlobalization && PlatformDetection.IsNotHybridGlobalizationOnOSX) yield return new object[] { "abc", "abc" + SoftHyphen, "def", StringComparison.InvariantCulture, "def" }; yield return new object[] { "abc", "abc", "def", StringComparison.InvariantCultureIgnoreCase, "def" }; @@ -752,7 +752,7 @@ public static IEnumerable Replace_StringComparison_TestData() yield return new object[] { "abc", "b", null, StringComparison.InvariantCultureIgnoreCase, "ac" }; - if (PlatformDetection.IsNotInvariantGlobalization) + if (PlatformDetection.IsNotInvariantGlobalization && PlatformDetection.IsNotHybridGlobalizationOnOSX) { yield return new object[] { "abc", "abc" + SoftHyphen, "def", StringComparison.InvariantCultureIgnoreCase, "def" }; diff --git a/src/native/libs/System.Globalization.Native/pal_collation.m b/src/native/libs/System.Globalization.Native/pal_collation.m index ba621b9177506b..f54d2dbe363a29 100644 --- a/src/native/libs/System.Globalization.Native/pal_collation.m +++ b/src/native/libs/System.Globalization.Native/pal_collation.m @@ -318,6 +318,7 @@ int32_t GlobalizationNative_GetSortKeyNative(const uint16_t* localeName, int32_t const char *utf8Bytes = [transformedString UTF8String]; if (utf8Bytes != NULL) { NSUInteger utf8Length = [transformedString lengthOfBytesUsingEncoding:NSUTF8StringEncoding]; + sortKey = (uint8_t *)malloc(utf8Length); memcpy(sortKey, utf8Bytes, utf8Length); return utf8Length; } @@ -328,12 +329,13 @@ int32_t GlobalizationNative_GetSortKeyNative(const uint16_t* localeName, int32_t if (utf16Data != nil) { const uint16_t *utf16Bytes = (const uint16_t *)[utf16Data bytes]; - NSUInteger utf16Length = [utf16Data length] / sizeof(uint16_t); + NSUInteger utf8Length = ([utf16Data length] / sizeof(uint16_t)) * 2; if (sortKey != NULL) { // Convert UTF-16 to UTF-8 manually - memcpy(sortKey, utf16Bytes, utf16Length * 2); - return utf16Length * 2; + sortKey = (uint8_t *)malloc(utf8Length); + memcpy(sortKey, utf16Bytes, utf8Length); + return utf8Length; } } } From e03f411c0eb88abf831138242b74c2f547e0cc00 Mon Sep 17 00:00:00 2001 From: Meri Khamoyan Date: Tue, 28 Nov 2023 15:41:44 +0100 Subject: [PATCH 03/12] Refactor the function and run more tests --- .../features/globalization-hybrid-mode.md | 2 +- .../Common/tests/Tests/System/StringTests.cs | 57 +++++++++++-------- .../System.Globalization.Tests.csproj | 1 - .../Hybrid/System.Runtime.IOS.Tests.csproj | 4 ++ .../System/StringTests.cs | 2 +- .../pal_collation.m | 38 ++++++------- 6 files changed, 57 insertions(+), 47 deletions(-) diff --git a/docs/design/features/globalization-hybrid-mode.md b/docs/design/features/globalization-hybrid-mode.md index 7966be7224710b..44e4fd5d16c040 100644 --- a/docs/design/features/globalization-hybrid-mode.md +++ b/docs/design/features/globalization-hybrid-mode.md @@ -463,7 +463,7 @@ Affected public APIs: - CompareInfo.GetSortKeyLength - CompareInfo.GetHashCode -Apple Native API does not have an equivalent, so they throw `PlatformNotSupportedException`. +Implemeneted using [stringByFoldingWithOptions:locale:](https://developer.apple.com/documentation/foundation/nsstring/1413779-stringbyfoldingwithoptions) ## Case change diff --git a/src/libraries/Common/tests/Tests/System/StringTests.cs b/src/libraries/Common/tests/Tests/System/StringTests.cs index 119e4b7d1af447..d993519457bc75 100644 --- a/src/libraries/Common/tests/Tests/System/StringTests.cs +++ b/src/libraries/Common/tests/Tests/System/StringTests.cs @@ -1009,6 +1009,7 @@ public static void MakeSureNoCompareToChecksGoOutOfRange_StringComparison() } [Fact] + [ActiveIssue("https://github.com/dotnet/runtime/issues/95338", typeof(PlatformDetection), nameof(PlatformDetection.IsHybridGlobalizationOnOSX))] public static void CompareToNoMatch_StringComparison() { for (int length = 1; length < 150; length++) @@ -1037,7 +1038,6 @@ public static void CompareToNoMatch_StringComparison() Assert.Equal( Math.Sign(string.Compare(firstSpan.ToString(), secondSpan.ToString(), StringComparison.OrdinalIgnoreCase)), Math.Sign(firstSpan.CompareTo(secondSpan, StringComparison.OrdinalIgnoreCase))); - Assert.Equal( string.Compare(firstSpan.ToString(), secondSpan.ToString(), StringComparison.CurrentCulture), firstSpan.CompareTo(secondSpan, StringComparison.CurrentCulture)); @@ -1283,6 +1283,7 @@ public static void ContainsMatchDifferentSpans_StringComparison() } [Fact] + [ActiveIssue("https://github.com/dotnet/runtime/issues/95338", typeof(PlatformDetection), nameof(PlatformDetection.IsHybridGlobalizationOnOSX))] public static void ContainsNoMatch_StringComparison() { for (int length = 1; length < 150; length++) @@ -1660,7 +1661,7 @@ public static IEnumerable EndsWith_StringComparison_TestData() yield return new object[] { "", "", StringComparison.CurrentCulture, true }; yield return new object[] { "", "a", StringComparison.CurrentCulture, false }; - if (PlatformDetection.IsNotInvariantGlobalization) + if (PlatformDetection.IsNotInvariantGlobalization && PlatformDetection.IsNotHybridGlobalizationOnOSX) yield return new object[] { "Hello", "llo" + SoftHyphen, StringComparison.CurrentCulture, true }; // CurrentCultureIgnoreCase @@ -1672,7 +1673,7 @@ public static IEnumerable EndsWith_StringComparison_TestData() yield return new object[] { "", "", StringComparison.CurrentCultureIgnoreCase, true }; yield return new object[] { "", "a", StringComparison.CurrentCultureIgnoreCase, false }; - if (PlatformDetection.IsNotInvariantGlobalization) + if (PlatformDetection.IsNotInvariantGlobalization && PlatformDetection.IsNotHybridGlobalizationOnOSX) yield return new object[] { "Hello", "llo" + SoftHyphen, StringComparison.CurrentCultureIgnoreCase, true }; // InvariantCulture @@ -1685,7 +1686,7 @@ public static IEnumerable EndsWith_StringComparison_TestData() yield return new object[] { "", "", StringComparison.InvariantCulture, true }; yield return new object[] { "", "a", StringComparison.InvariantCulture, false }; - if (PlatformDetection.IsNotInvariantGlobalization) + if (PlatformDetection.IsNotInvariantGlobalization && PlatformDetection.IsNotHybridGlobalizationOnOSX) yield return new object[] { "Hello", "llo" + SoftHyphen, StringComparison.InvariantCulture, true }; // InvariantCultureIgnoreCase @@ -1697,7 +1698,7 @@ public static IEnumerable EndsWith_StringComparison_TestData() yield return new object[] { "", "", StringComparison.InvariantCultureIgnoreCase, true }; yield return new object[] { "", "a", StringComparison.InvariantCultureIgnoreCase, false }; - if (PlatformDetection.IsNotInvariantGlobalization) + if (PlatformDetection.IsNotInvariantGlobalization && PlatformDetection.IsNotHybridGlobalizationOnOSX) yield return new object[] { "Hello", "llo" + SoftHyphen, StringComparison.InvariantCultureIgnoreCase, true }; // Ordinal @@ -2109,6 +2110,7 @@ public static void EndsWithMatchDifferentSpans_StringComparison() } [Fact] + [ActiveIssue("https://github.com/dotnet/runtime/issues/95338", typeof(PlatformDetection), nameof(PlatformDetection.IsHybridGlobalizationOnOSX))] public static void EndsWithNoMatch_StringComparison() { for (int length = 1; length < 150; length++) @@ -2151,12 +2153,15 @@ public static void EndsWithNoMatch_StringComparison() Assert.False(firstSpan.EndsWith(secondSpan, StringComparison.OrdinalIgnoreCase)); // Different behavior depending on OS - Assert.Equal( - firstSpan.ToString().EndsWith(secondSpan.ToString(), StringComparison.CurrentCulture), - firstSpan.EndsWith(secondSpan, StringComparison.CurrentCulture)); - Assert.Equal( - firstSpan.ToString().EndsWith(secondSpan.ToString(), StringComparison.CurrentCultureIgnoreCase), - firstSpan.EndsWith(secondSpan, StringComparison.CurrentCultureIgnoreCase)); + if (PlatformDetection.IsNotHybridGlobalizationOnOSX) + { + Assert.Equal( + firstSpan.ToString().EndsWith(secondSpan.ToString(), StringComparison.CurrentCulture), + firstSpan.EndsWith(secondSpan, StringComparison.CurrentCulture)); + Assert.Equal( + firstSpan.ToString().EndsWith(secondSpan.ToString(), StringComparison.CurrentCultureIgnoreCase), + firstSpan.EndsWith(secondSpan, StringComparison.CurrentCultureIgnoreCase)); + } Assert.Equal( firstSpan.ToString().EndsWith(secondSpan.ToString(), StringComparison.InvariantCulture), firstSpan.EndsWith(secondSpan, StringComparison.InvariantCulture)); @@ -3194,7 +3199,7 @@ public static void IndexOf_TurkishI_EnglishUSCulture() } } - [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNotInvariantGlobalization))] + [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsIcuGlobalizationAndNotHybrid))] [ActiveIssue("https://github.com/dotnet/runtime/issues/60568", TestPlatforms.Android | TestPlatforms.LinuxBionic)] public static void IndexOf_HungarianDoubleCompression_HungarianCulture() { @@ -4849,7 +4854,7 @@ public static IEnumerable StartsWith_StringComparison_TestData() yield return new object[] { "", "", StringComparison.CurrentCulture, true }; yield return new object[] { "", "hello", StringComparison.CurrentCulture, false }; - if (PlatformDetection.IsNotInvariantGlobalization) + if (PlatformDetection.IsNotInvariantGlobalization && PlatformDetection.IsNotHybridGlobalizationOnOSX) yield return new object[] { "Hello", SoftHyphen + "Hel", StringComparison.CurrentCulture, true }; // CurrentCultureIgnoreCase @@ -4861,7 +4866,7 @@ public static IEnumerable StartsWith_StringComparison_TestData() yield return new object[] { "", "", StringComparison.CurrentCultureIgnoreCase, true }; yield return new object[] { "", "hello", StringComparison.CurrentCultureIgnoreCase, false }; - if (PlatformDetection.IsNotInvariantGlobalization) + if (PlatformDetection.IsNotInvariantGlobalization && PlatformDetection.IsNotHybridGlobalizationOnOSX) yield return new object[] { "Hello", SoftHyphen + "Hel", StringComparison.CurrentCultureIgnoreCase, true }; // InvariantCulture @@ -4873,7 +4878,7 @@ public static IEnumerable StartsWith_StringComparison_TestData() yield return new object[] { "", "", StringComparison.InvariantCulture, true }; yield return new object[] { "", "hello", StringComparison.InvariantCulture, false }; - if (PlatformDetection.IsNotInvariantGlobalization) + if (PlatformDetection.IsNotInvariantGlobalization && PlatformDetection.IsNotHybridGlobalizationOnOSX) yield return new object[] { "Hello", SoftHyphen + "Hel", StringComparison.InvariantCulture, true }; // InvariantCultureIgnoreCase @@ -4885,7 +4890,7 @@ public static IEnumerable StartsWith_StringComparison_TestData() yield return new object[] { "", "", StringComparison.InvariantCultureIgnoreCase, true }; yield return new object[] { "", "hello", StringComparison.InvariantCultureIgnoreCase, false }; - if (PlatformDetection.IsNotInvariantGlobalization) + if (PlatformDetection.IsNotInvariantGlobalization && PlatformDetection.IsNotHybridGlobalizationOnOSX) yield return new object[] { "Hello", SoftHyphen + "Hel", StringComparison.InvariantCultureIgnoreCase, true }; // Ordinal @@ -5342,6 +5347,7 @@ private static IEnumerable ToLower_Culture_TestData() } [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNotInvariantGlobalization))] + [ActiveIssue("https://github.com/dotnet/runtime/issues/95338", typeof(PlatformDetection), nameof(PlatformDetection.IsHybridGlobalizationOnOSX))] public static void Test_ToLower_Culture() { foreach (object[] testdata in ToLower_Culture_TestData()) @@ -5857,6 +5863,7 @@ public static IEnumerable ToUpper_Culture_TestData() } [ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.IsNotInvariantGlobalization))] + [ActiveIssue("https://github.com/dotnet/runtime/issues/95338", typeof(PlatformDetection), nameof(PlatformDetection.IsHybridGlobalizationOnOSX))] [MemberData(nameof(ToUpper_Culture_TestData))] public static void Test_ToUpper_Culture(string actual, string expected, CultureInfo culture) { @@ -5955,7 +5962,7 @@ public static IEnumerable ToUpper_TurkishI_InvariantCulture_MemberData new KeyValuePair('\u0130', '\u0130'), new KeyValuePair('\u0131', '\u0131')); - [ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.IsNotInvariantGlobalization))] + [ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.IsIcuGlobalizationAndNotHybrid))] [MemberData(nameof(ToUpper_TurkishI_InvariantCulture_MemberData))] public static void ToUpper_TurkishI_InvariantCulture(string s, string expected) { @@ -7225,6 +7232,7 @@ public static void StartsWithMatchDifferentSpans_StringComparison() } [Fact] + [ActiveIssue("https://github.com/dotnet/runtime/issues/95338", typeof(PlatformDetection), nameof(PlatformDetection.IsHybridGlobalizationOnOSX))] public static void StartsWithNoMatch_StringComparison() { for (int length = 1; length < 150; length++) @@ -7267,12 +7275,15 @@ public static void StartsWithNoMatch_StringComparison() Assert.False(firstSpan.StartsWith(secondSpan, StringComparison.OrdinalIgnoreCase)); // Different behavior depending on OS - Assert.Equal( - firstSpan.ToString().StartsWith(secondSpan.ToString(), StringComparison.CurrentCulture), - firstSpan.StartsWith(secondSpan, StringComparison.CurrentCulture)); - Assert.Equal( - firstSpan.ToString().StartsWith(secondSpan.ToString(), StringComparison.CurrentCultureIgnoreCase), - firstSpan.StartsWith(secondSpan, StringComparison.CurrentCultureIgnoreCase)); + if (PlatformDetection.IsNotHybridGlobalizationOnOSX) + { + Assert.Equal( + firstSpan.ToString().StartsWith(secondSpan.ToString(), StringComparison.CurrentCulture), + firstSpan.StartsWith(secondSpan, StringComparison.CurrentCulture)); + Assert.Equal( + firstSpan.ToString().StartsWith(secondSpan.ToString(), StringComparison.CurrentCultureIgnoreCase), + firstSpan.StartsWith(secondSpan, StringComparison.CurrentCultureIgnoreCase)); + } Assert.Equal( firstSpan.ToString().StartsWith(secondSpan.ToString(), StringComparison.InvariantCulture), firstSpan.StartsWith(secondSpan, StringComparison.InvariantCulture)); diff --git a/src/libraries/System.Runtime/tests/System.Globalization.Tests/System.Globalization.Tests.csproj b/src/libraries/System.Runtime/tests/System.Globalization.Tests/System.Globalization.Tests.csproj index 8290f213f117d3..e5ba39e62b7a70 100644 --- a/src/libraries/System.Runtime/tests/System.Globalization.Tests/System.Globalization.Tests.csproj +++ b/src/libraries/System.Runtime/tests/System.Globalization.Tests/System.Globalization.Tests.csproj @@ -4,7 +4,6 @@ true true $(NetCoreAppCurrent) - true 15.0 diff --git a/src/libraries/System.Runtime/tests/System.Runtime.Tests/Hybrid/System.Runtime.IOS.Tests.csproj b/src/libraries/System.Runtime/tests/System.Runtime.Tests/Hybrid/System.Runtime.IOS.Tests.csproj index ca2e3b7856cda5..0eb9e1d6a4942a 100644 --- a/src/libraries/System.Runtime/tests/System.Runtime.Tests/Hybrid/System.Runtime.IOS.Tests.csproj +++ b/src/libraries/System.Runtime/tests/System.Runtime.Tests/Hybrid/System.Runtime.IOS.Tests.csproj @@ -15,5 +15,9 @@ + + + + diff --git a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/StringTests.cs b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/StringTests.cs index 43039a4067945e..507c50dfda3ca8 100644 --- a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/StringTests.cs +++ b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/StringTests.cs @@ -821,7 +821,7 @@ public static IEnumerable Replace_StringComparisonCulture_TestData() yield return new object[] { "abc", "abc", "def", true, CultureInfo.InvariantCulture, "def" }; yield return new object[] { "abc", "ABC", "def", true, CultureInfo.InvariantCulture, "def" }; - if (PlatformDetection.IsNotInvariantGlobalization) + if (PlatformDetection.IsNotInvariantGlobalization && PlatformDetection.IsNotHybridGlobalizationOnOSX) { yield return new object[] { "abc", "abc" + SoftHyphen, "def", false, null, "def" }; yield return new object[] { "abc", "abc" + SoftHyphen, "def", true, null, "def" }; diff --git a/src/native/libs/System.Globalization.Native/pal_collation.m b/src/native/libs/System.Globalization.Native/pal_collation.m index f54d2dbe363a29..e8a95f132788d5 100644 --- a/src/native/libs/System.Globalization.Native/pal_collation.m +++ b/src/native/libs/System.Globalization.Native/pal_collation.m @@ -301,7 +301,8 @@ int32_t GlobalizationNative_GetSortKeyNative(const uint16_t* localeName, int32_t @autoreleasepool { if (cwStrLength == 0) { - sortKey = malloc(1); + if (sortKey == NULL) + sortKey = malloc(1); sortKey[0] = '\0'; return 1; } @@ -316,28 +317,23 @@ int32_t GlobalizationNative_GetSortKeyNative(const uint16_t* localeName, int32_t // Convert the string to UTF-8 representation const char *utf8Bytes = [transformedString UTF8String]; + NSData *dataToUse = nil; + NSUInteger utf8Length = 0; if (utf8Bytes != NULL) { - NSUInteger utf8Length = [transformedString lengthOfBytesUsingEncoding:NSUTF8StringEncoding]; - sortKey = (uint8_t *)malloc(utf8Length); - memcpy(sortKey, utf8Bytes, utf8Length); - return utf8Length; - } - else - { + utf8Length = [transformedString lengthOfBytesUsingEncoding:NSUTF8StringEncoding]; + dataToUse = [NSData dataWithBytes:utf8Bytes length:utf8Length]; + } else { // Convert the string to UTF-16 representation - NSData *utf16Data = [transformedString dataUsingEncoding:NSUTF16StringEncoding]; - - if (utf16Data != nil) { - const uint16_t *utf16Bytes = (const uint16_t *)[utf16Data bytes]; - NSUInteger utf8Length = ([utf16Data length] / sizeof(uint16_t)) * 2; - - if (sortKey != NULL) { - // Convert UTF-16 to UTF-8 manually - sortKey = (uint8_t *)malloc(utf8Length); - memcpy(sortKey, utf16Bytes, utf8Length); - return utf8Length; - } - } + dataToUse = [transformedString dataUsingEncoding:NSUTF16StringEncoding]; + utf8Length = ([dataToUse length] / sizeof(uint16_t)) * 2; + } + + if (dataToUse != nil) { + const uint8_t *bytesToCopy = (const uint8_t *)[dataToUse bytes]; + if (sortKey == NULL) + sortKey = (uint8_t *)malloc(utf8Length); + memcpy(sortKey, bytesToCopy, utf8Length); + return utf8Length; } return 0; From a97a48b41356c950ab770a947e36e47d679eeae5 Mon Sep 17 00:00:00 2001 From: Meri Khamoyan Date: Tue, 28 Nov 2023 15:46:24 +0100 Subject: [PATCH 04/12] Minor update --- .../Common/tests/Tests/System/StringTests.cs | 31 ++++++++----------- 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/src/libraries/Common/tests/Tests/System/StringTests.cs b/src/libraries/Common/tests/Tests/System/StringTests.cs index d993519457bc75..f151ee10db8efa 100644 --- a/src/libraries/Common/tests/Tests/System/StringTests.cs +++ b/src/libraries/Common/tests/Tests/System/StringTests.cs @@ -1038,6 +1038,7 @@ public static void CompareToNoMatch_StringComparison() Assert.Equal( Math.Sign(string.Compare(firstSpan.ToString(), secondSpan.ToString(), StringComparison.OrdinalIgnoreCase)), Math.Sign(firstSpan.CompareTo(secondSpan, StringComparison.OrdinalIgnoreCase))); + Assert.Equal( string.Compare(firstSpan.ToString(), secondSpan.ToString(), StringComparison.CurrentCulture), firstSpan.CompareTo(secondSpan, StringComparison.CurrentCulture)); @@ -2153,15 +2154,12 @@ public static void EndsWithNoMatch_StringComparison() Assert.False(firstSpan.EndsWith(secondSpan, StringComparison.OrdinalIgnoreCase)); // Different behavior depending on OS - if (PlatformDetection.IsNotHybridGlobalizationOnOSX) - { - Assert.Equal( - firstSpan.ToString().EndsWith(secondSpan.ToString(), StringComparison.CurrentCulture), - firstSpan.EndsWith(secondSpan, StringComparison.CurrentCulture)); - Assert.Equal( - firstSpan.ToString().EndsWith(secondSpan.ToString(), StringComparison.CurrentCultureIgnoreCase), - firstSpan.EndsWith(secondSpan, StringComparison.CurrentCultureIgnoreCase)); - } + Assert.Equal( + firstSpan.ToString().EndsWith(secondSpan.ToString(), StringComparison.CurrentCulture), + firstSpan.EndsWith(secondSpan, StringComparison.CurrentCulture)); + Assert.Equal( + firstSpan.ToString().EndsWith(secondSpan.ToString(), StringComparison.CurrentCultureIgnoreCase), + firstSpan.EndsWith(secondSpan, StringComparison.CurrentCultureIgnoreCase)); Assert.Equal( firstSpan.ToString().EndsWith(secondSpan.ToString(), StringComparison.InvariantCulture), firstSpan.EndsWith(secondSpan, StringComparison.InvariantCulture)); @@ -7275,15 +7273,12 @@ public static void StartsWithNoMatch_StringComparison() Assert.False(firstSpan.StartsWith(secondSpan, StringComparison.OrdinalIgnoreCase)); // Different behavior depending on OS - if (PlatformDetection.IsNotHybridGlobalizationOnOSX) - { - Assert.Equal( - firstSpan.ToString().StartsWith(secondSpan.ToString(), StringComparison.CurrentCulture), - firstSpan.StartsWith(secondSpan, StringComparison.CurrentCulture)); - Assert.Equal( - firstSpan.ToString().StartsWith(secondSpan.ToString(), StringComparison.CurrentCultureIgnoreCase), - firstSpan.StartsWith(secondSpan, StringComparison.CurrentCultureIgnoreCase)); - } + Assert.Equal( + firstSpan.ToString().StartsWith(secondSpan.ToString(), StringComparison.CurrentCulture), + firstSpan.StartsWith(secondSpan, StringComparison.CurrentCulture)); + Assert.Equal( + firstSpan.ToString().StartsWith(secondSpan.ToString(), StringComparison.CurrentCultureIgnoreCase), + firstSpan.StartsWith(secondSpan, StringComparison.CurrentCultureIgnoreCase)); Assert.Equal( firstSpan.ToString().StartsWith(secondSpan.ToString(), StringComparison.InvariantCulture), firstSpan.StartsWith(secondSpan, StringComparison.InvariantCulture)); From b37beba81f90f07672e37821c0efe034fbf87aa2 Mon Sep 17 00:00:00 2001 From: Meri Khamoyan Date: Wed, 29 Nov 2023 14:39:43 +0100 Subject: [PATCH 05/12] update GetSortKeyNative function --- .../src/System/Globalization/CompareInfo.Icu.cs | 2 +- .../libs/System.Globalization.Native/pal_collation.m | 10 ++++------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Icu.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Icu.cs index 14d75a367fdc01..6562b8e535e97a 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Icu.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Icu.cs @@ -718,7 +718,7 @@ private unsafe SortKey IcuCreateSortKey(string source, CompareOptions options) #if TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS if (GlobalizationMode.Hybrid) { - if (Interop.Globalization.GetSortKey(_sortHandle, pSource, source.Length, null, 0, options) != sortKeyLength) + if (Interop.Globalization.GetSortKeyNative(m_name, m_name.Length, pSource, source.Length, null, 0, options) != sortKeyLength) { throw new ArgumentException(SR.Arg_ExternalException); } diff --git a/src/native/libs/System.Globalization.Native/pal_collation.m b/src/native/libs/System.Globalization.Native/pal_collation.m index e8a95f132788d5..819226e8002c70 100644 --- a/src/native/libs/System.Globalization.Native/pal_collation.m +++ b/src/native/libs/System.Globalization.Native/pal_collation.m @@ -301,9 +301,8 @@ int32_t GlobalizationNative_GetSortKeyNative(const uint16_t* localeName, int32_t @autoreleasepool { if (cwStrLength == 0) { - if (sortKey == NULL) - sortKey = malloc(1); - sortKey[0] = '\0'; + if (sortKey != NULL) + sortKey[0] = '\0'; return 1; } NSString *sourceString = [NSString stringWithCharacters: lpStr length: cwStrLength]; @@ -330,9 +329,8 @@ int32_t GlobalizationNative_GetSortKeyNative(const uint16_t* localeName, int32_t if (dataToUse != nil) { const uint8_t *bytesToCopy = (const uint8_t *)[dataToUse bytes]; - if (sortKey == NULL) - sortKey = (uint8_t *)malloc(utf8Length); - memcpy(sortKey, bytesToCopy, utf8Length); + if (sortKey != NULL) + memcpy(sortKey, bytesToCopy, utf8Length); return utf8Length; } From 33049cf4470f093c5e8e792b0bacf9c76876694b Mon Sep 17 00:00:00 2001 From: Meri Khamoyan Date: Wed, 29 Nov 2023 16:32:12 +0100 Subject: [PATCH 06/12] Disable test only for hybrid mode on osx --- src/libraries/Common/tests/Tests/System/StringTests.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libraries/Common/tests/Tests/System/StringTests.cs b/src/libraries/Common/tests/Tests/System/StringTests.cs index f151ee10db8efa..c0b31d5d352c32 100644 --- a/src/libraries/Common/tests/Tests/System/StringTests.cs +++ b/src/libraries/Common/tests/Tests/System/StringTests.cs @@ -3197,7 +3197,7 @@ public static void IndexOf_TurkishI_EnglishUSCulture() } } - [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsIcuGlobalizationAndNotHybrid))] + [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNotInvariantGlobalization), nameof(PlatformDetection.IsNotHybridGlobalizationOnOSX))] [ActiveIssue("https://github.com/dotnet/runtime/issues/60568", TestPlatforms.Android | TestPlatforms.LinuxBionic)] public static void IndexOf_HungarianDoubleCompression_HungarianCulture() { @@ -5960,7 +5960,7 @@ public static IEnumerable ToUpper_TurkishI_InvariantCulture_MemberData new KeyValuePair('\u0130', '\u0130'), new KeyValuePair('\u0131', '\u0131')); - [ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.IsIcuGlobalizationAndNotHybrid))] + [ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.IsNotInvariantGlobalization), nameof(PlatformDetection.IsNotHybridGlobalizationOnOSX))] [MemberData(nameof(ToUpper_TurkishI_InvariantCulture_MemberData))] public static void ToUpper_TurkishI_InvariantCulture(string s, string expected) { From 3b72c25600da0c96c81ec2efa88012047aa9a8b5 Mon Sep 17 00:00:00 2001 From: Meri Khamoyan Date: Thu, 30 Nov 2023 12:09:43 +0100 Subject: [PATCH 07/12] Enable SortKey tests --- .../CompareInfo/CompareInfoTests.cs | 197 +++++++++--------- .../pal_collation.m | 2 +- 2 files changed, 103 insertions(+), 96 deletions(-) diff --git a/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.cs b/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.cs index 8b84c416ba11fb..bb25720bbff101 100644 --- a/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.cs +++ b/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.cs @@ -114,72 +114,75 @@ public static IEnumerable SortKey_Kana_TestData() public static IEnumerable SortKey_TestData() { - CompareOptions ignoreKanaIgnoreWidthIgnoreCase = CompareOptions.IgnoreKanaType | CompareOptions.IgnoreWidth | CompareOptions.IgnoreCase; - yield return new object[] { s_invariantCompare, "\u3042", "\u30A2", ignoreKanaIgnoreWidthIgnoreCase, 0 }; - yield return new object[] { s_invariantCompare, "\u3042", "\uFF71", ignoreKanaIgnoreWidthIgnoreCase, 0 }; - - yield return new object[] { s_invariantCompare, "\u304D\u3083", "\u30AD\u30E3", ignoreKanaIgnoreWidthIgnoreCase, 0 }; - yield return new object[] { s_invariantCompare, "\u304D\u3083", "\u30AD\u3083", ignoreKanaIgnoreWidthIgnoreCase, 0 }; - yield return new object[] { s_invariantCompare, "\u304D \u3083", "\u30AD\u3083", ignoreKanaIgnoreWidthIgnoreCase, -1 }; - yield return new object[] { s_invariantCompare, "\u3044", "I", ignoreKanaIgnoreWidthIgnoreCase, 1 }; - - yield return new object[] { s_invariantCompare, "a", "A", ignoreKanaIgnoreWidthIgnoreCase, 0 }; - yield return new object[] { s_invariantCompare, "a", "\uFF41", ignoreKanaIgnoreWidthIgnoreCase, 0 }; - yield return new object[] { s_invariantCompare, "ABCDE", "\uFF21\uFF22\uFF23\uFF24\uFF25", ignoreKanaIgnoreWidthIgnoreCase, 0 }; - yield return new object[] { s_invariantCompare, "ABCDE", "\uFF21\uFF22\uFF23D\uFF25", ignoreKanaIgnoreWidthIgnoreCase, 0 }; - yield return new object[] { s_invariantCompare, "ABCDE", "a\uFF22\uFF23D\uFF25", ignoreKanaIgnoreWidthIgnoreCase, 0 }; - yield return new object[] { s_invariantCompare, "ABCDE", "\uFF41\uFF42\uFF23D\uFF25", ignoreKanaIgnoreWidthIgnoreCase, 0 }; - - yield return new object[] { s_invariantCompare, "\u6FA4", "\u6CA2", ignoreKanaIgnoreWidthIgnoreCase, 1 }; - - yield return new object[] { s_invariantCompare, "\u3070\u3073\u3076\u3079\u307C", "\u30D0\u30D3\u30D6\u30D9\u30DC", ignoreKanaIgnoreWidthIgnoreCase, 0 }; - yield return new object[] { s_invariantCompare, "\u3070\u3073\u3076\u3079\u307C", "\u30D0\u30D3\u3076\u30D9\u30DC", ignoreKanaIgnoreWidthIgnoreCase, 0 }; - yield return new object[] { s_invariantCompare, "\u3070\u3073\uFF8C\uFF9E\uFF8D\uFF9E\u307C", "\uFF8E\uFF9E", ignoreKanaIgnoreWidthIgnoreCase, -1 }; - yield return new object[] { s_invariantCompare, "\u3070\u30DC\uFF8C\uFF9E\uFF8D\uFF9E\u307C", "\uFF8E\uFF9E", ignoreKanaIgnoreWidthIgnoreCase, -1 }; - yield return new object[] { s_invariantCompare, "\u3070\u30DC\uFF8C\uFF9E\uFF8D\uFF9E\u307C", "\u3079\uFF8E\uFF9E", ignoreKanaIgnoreWidthIgnoreCase, -1 }; - yield return new object[] { s_invariantCompare, "\u3070\u3073\uFF8C\uFF9E\uFF8D\uFF9E\u307C", "\u30D6", ignoreKanaIgnoreWidthIgnoreCase, -1 }; - yield return new object[] { s_invariantCompare, "\u3071\u3074\u30D7\u307A", "\uFF8B\uFF9F\uFF8C\uFF9F", ignoreKanaIgnoreWidthIgnoreCase, -1 }; - yield return new object[] { s_invariantCompare, "\u3070\u30DC\uFF8C\uFF9E\uFF8D\uFF9E\u307C", "\u3070\uFF8E\uFF9E\u30D6", ignoreKanaIgnoreWidthIgnoreCase, 1 }; - yield return new object[] { s_invariantCompare, "\u3070\u30DC\uFF8C\uFF9E\uFF8D\uFF9E\u307C\u3079\u307C", "\u3079\uFF8E\uFF9E", ignoreKanaIgnoreWidthIgnoreCase, -1 }; - yield return new object[] { s_invariantCompare, "\u3070\uFF8C\uFF9E\uFF8D\uFF9E\u307C", "\u30D6", ignoreKanaIgnoreWidthIgnoreCase, -1 }; - - yield return new object[] { s_invariantCompare, "ABDDE", "D", ignoreKanaIgnoreWidthIgnoreCase, -1 }; - yield return new object[] { s_invariantCompare, "ABCDE", "\uFF43D", ignoreKanaIgnoreWidthIgnoreCase, -1 }; - yield return new object[] { s_invariantCompare, "ABCDE", "c", ignoreKanaIgnoreWidthIgnoreCase, -1 }; - yield return new object[] { s_invariantCompare, "\u3060", "\u305F", ignoreKanaIgnoreWidthIgnoreCase, 1 }; - yield return new object[] { s_invariantCompare, "\u3060", "\u30C0", ignoreKanaIgnoreWidthIgnoreCase, 0 }; - yield return new object[] { s_invariantCompare, "\u30BF", "\uFF80", ignoreKanaIgnoreWidthIgnoreCase, 0 }; - - yield return new object[] { s_invariantCompare, "\u68EE\u9D0E\u5916", "\u68EE\u9DD7\u5916", ignoreKanaIgnoreWidthIgnoreCase, -1 }; - yield return new object[] { s_invariantCompare, "\u68EE\u9DD7\u5916", "\u68EE\u9DD7\u5916", ignoreKanaIgnoreWidthIgnoreCase, 0 }; - yield return new object[] { s_invariantCompare, "\u2019\u2019\u2019\u2019", "''''", ignoreKanaIgnoreWidthIgnoreCase, 1 }; - yield return new object[] { s_invariantCompare, "\u2019", "'", ignoreKanaIgnoreWidthIgnoreCase, 1 }; - yield return new object[] { s_invariantCompare, "", "'", ignoreKanaIgnoreWidthIgnoreCase, -1 }; - yield return new object[] { s_invariantCompare, "\u4E00", "\uFF11", ignoreKanaIgnoreWidthIgnoreCase, 1 }; - yield return new object[] { s_invariantCompare, "\u2160", "\uFF11", ignoreKanaIgnoreWidthIgnoreCase, 1 }; - - yield return new object[] { s_invariantCompare, "0", "\uFF10", ignoreKanaIgnoreWidthIgnoreCase, 0 }; - yield return new object[] { s_invariantCompare, "10", "1\uFF10", ignoreKanaIgnoreWidthIgnoreCase, 0 }; - yield return new object[] { s_invariantCompare, "9999\uFF1910", "1\uFF10", ignoreKanaIgnoreWidthIgnoreCase, 1 }; - yield return new object[] { s_invariantCompare, "9999\uFF191010", "1\uFF10", ignoreKanaIgnoreWidthIgnoreCase, 1 }; - - yield return new object[] { s_invariantCompare, "'\u3000'", "' '", ignoreKanaIgnoreWidthIgnoreCase, 0 }; - yield return new object[] { s_invariantCompare, "\uFF1B", ";", ignoreKanaIgnoreWidthIgnoreCase, 0 }; - yield return new object[] { s_invariantCompare, "\uFF08", "(", ignoreKanaIgnoreWidthIgnoreCase, 0 }; - yield return new object[] { s_invariantCompare, "\u30FC", "\uFF70", ignoreKanaIgnoreWidthIgnoreCase, 0 }; - yield return new object[] { s_invariantCompare, "\u30FC", "\uFF0D", ignoreKanaIgnoreWidthIgnoreCase, 1 }; - yield return new object[] { s_invariantCompare, "\u30FC", "\u30FC", ignoreKanaIgnoreWidthIgnoreCase, 0 }; - yield return new object[] { s_invariantCompare, "\u30FC", "\u2015", ignoreKanaIgnoreWidthIgnoreCase, 1 }; - yield return new object[] { s_invariantCompare, "\u30FC", "\u2010", ignoreKanaIgnoreWidthIgnoreCase, 1 }; - - yield return new object[] { s_invariantCompare, "/", "\uFF0F", ignoreKanaIgnoreWidthIgnoreCase, 0 }; - yield return new object[] { s_invariantCompare, "\"", "\uFF02", ignoreKanaIgnoreWidthIgnoreCase, 0 }; - - if (!PlatformDetection.IsWindows7) + if (PlatformDetection.IsNotHybridGlobalizationOnOSX) { - // For the below string, LCMapStringEx and CompareStringEx on Windows 7 return inconsistent results. - // We'll only run this test case on Win8+ or on non-Windows machines. - yield return new object[] { s_invariantCompare, "'", "\uFF07", ignoreKanaIgnoreWidthIgnoreCase, 0 }; + CompareOptions ignoreKanaIgnoreWidthIgnoreCase = CompareOptions.IgnoreKanaType | CompareOptions.IgnoreWidth | CompareOptions.IgnoreCase; + yield return new object[] { s_invariantCompare, "\u3042", "\u30A2", ignoreKanaIgnoreWidthIgnoreCase, 0 }; + yield return new object[] { s_invariantCompare, "\u3042", "\uFF71", ignoreKanaIgnoreWidthIgnoreCase, 0 }; + + yield return new object[] { s_invariantCompare, "\u304D\u3083", "\u30AD\u30E3", ignoreKanaIgnoreWidthIgnoreCase, 0 }; + yield return new object[] { s_invariantCompare, "\u304D\u3083", "\u30AD\u3083", ignoreKanaIgnoreWidthIgnoreCase, 0 }; + yield return new object[] { s_invariantCompare, "\u304D \u3083", "\u30AD\u3083", ignoreKanaIgnoreWidthIgnoreCase, -1 }; + yield return new object[] { s_invariantCompare, "\u3044", "I", ignoreKanaIgnoreWidthIgnoreCase, 1 }; + + yield return new object[] { s_invariantCompare, "a", "A", ignoreKanaIgnoreWidthIgnoreCase, 0 }; + yield return new object[] { s_invariantCompare, "a", "\uFF41", ignoreKanaIgnoreWidthIgnoreCase, 0 }; + yield return new object[] { s_invariantCompare, "ABCDE", "\uFF21\uFF22\uFF23\uFF24\uFF25", ignoreKanaIgnoreWidthIgnoreCase, 0 }; + yield return new object[] { s_invariantCompare, "ABCDE", "\uFF21\uFF22\uFF23D\uFF25", ignoreKanaIgnoreWidthIgnoreCase, 0 }; + yield return new object[] { s_invariantCompare, "ABCDE", "a\uFF22\uFF23D\uFF25", ignoreKanaIgnoreWidthIgnoreCase, 0 }; + yield return new object[] { s_invariantCompare, "ABCDE", "\uFF41\uFF42\uFF23D\uFF25", ignoreKanaIgnoreWidthIgnoreCase, 0 }; + + yield return new object[] { s_invariantCompare, "\u6FA4", "\u6CA2", ignoreKanaIgnoreWidthIgnoreCase, 1 }; + + yield return new object[] { s_invariantCompare, "\u3070\u3073\u3076\u3079\u307C", "\u30D0\u30D3\u30D6\u30D9\u30DC", ignoreKanaIgnoreWidthIgnoreCase, 0 }; + yield return new object[] { s_invariantCompare, "\u3070\u3073\u3076\u3079\u307C", "\u30D0\u30D3\u3076\u30D9\u30DC", ignoreKanaIgnoreWidthIgnoreCase, 0 }; + yield return new object[] { s_invariantCompare, "\u3070\u3073\uFF8C\uFF9E\uFF8D\uFF9E\u307C", "\uFF8E\uFF9E", ignoreKanaIgnoreWidthIgnoreCase, -1 }; + yield return new object[] { s_invariantCompare, "\u3070\u30DC\uFF8C\uFF9E\uFF8D\uFF9E\u307C", "\uFF8E\uFF9E", ignoreKanaIgnoreWidthIgnoreCase, -1 }; + yield return new object[] { s_invariantCompare, "\u3070\u30DC\uFF8C\uFF9E\uFF8D\uFF9E\u307C", "\u3079\uFF8E\uFF9E", ignoreKanaIgnoreWidthIgnoreCase, -1 }; + yield return new object[] { s_invariantCompare, "\u3070\u3073\uFF8C\uFF9E\uFF8D\uFF9E\u307C", "\u30D6", ignoreKanaIgnoreWidthIgnoreCase, -1 }; + yield return new object[] { s_invariantCompare, "\u3071\u3074\u30D7\u307A", "\uFF8B\uFF9F\uFF8C\uFF9F", ignoreKanaIgnoreWidthIgnoreCase, -1 }; + yield return new object[] { s_invariantCompare, "\u3070\u30DC\uFF8C\uFF9E\uFF8D\uFF9E\u307C", "\u3070\uFF8E\uFF9E\u30D6", ignoreKanaIgnoreWidthIgnoreCase, 1 }; + yield return new object[] { s_invariantCompare, "\u3070\u30DC\uFF8C\uFF9E\uFF8D\uFF9E\u307C\u3079\u307C", "\u3079\uFF8E\uFF9E", ignoreKanaIgnoreWidthIgnoreCase, -1 }; + yield return new object[] { s_invariantCompare, "\u3070\uFF8C\uFF9E\uFF8D\uFF9E\u307C", "\u30D6", ignoreKanaIgnoreWidthIgnoreCase, -1 }; + + yield return new object[] { s_invariantCompare, "ABDDE", "D", ignoreKanaIgnoreWidthIgnoreCase, -1 }; + yield return new object[] { s_invariantCompare, "ABCDE", "\uFF43D", ignoreKanaIgnoreWidthIgnoreCase, -1 }; + yield return new object[] { s_invariantCompare, "ABCDE", "c", ignoreKanaIgnoreWidthIgnoreCase, -1 }; + yield return new object[] { s_invariantCompare, "\u3060", "\u305F", ignoreKanaIgnoreWidthIgnoreCase, 1 }; + yield return new object[] { s_invariantCompare, "\u3060", "\u30C0", ignoreKanaIgnoreWidthIgnoreCase, 0 }; + yield return new object[] { s_invariantCompare, "\u30BF", "\uFF80", ignoreKanaIgnoreWidthIgnoreCase, 0 }; + + yield return new object[] { s_invariantCompare, "\u68EE\u9D0E\u5916", "\u68EE\u9DD7\u5916", ignoreKanaIgnoreWidthIgnoreCase, -1 }; + yield return new object[] { s_invariantCompare, "\u68EE\u9DD7\u5916", "\u68EE\u9DD7\u5916", ignoreKanaIgnoreWidthIgnoreCase, 0 }; + yield return new object[] { s_invariantCompare, "\u2019\u2019\u2019\u2019", "''''", ignoreKanaIgnoreWidthIgnoreCase, 1 }; + yield return new object[] { s_invariantCompare, "\u2019", "'", ignoreKanaIgnoreWidthIgnoreCase, 1 }; + yield return new object[] { s_invariantCompare, "", "'", ignoreKanaIgnoreWidthIgnoreCase, -1 }; + yield return new object[] { s_invariantCompare, "\u4E00", "\uFF11", ignoreKanaIgnoreWidthIgnoreCase, 1 }; + yield return new object[] { s_invariantCompare, "\u2160", "\uFF11", ignoreKanaIgnoreWidthIgnoreCase, 1 }; + + yield return new object[] { s_invariantCompare, "0", "\uFF10", ignoreKanaIgnoreWidthIgnoreCase, 0 }; + yield return new object[] { s_invariantCompare, "10", "1\uFF10", ignoreKanaIgnoreWidthIgnoreCase, 0 }; + yield return new object[] { s_invariantCompare, "9999\uFF1910", "1\uFF10", ignoreKanaIgnoreWidthIgnoreCase, 1 }; + yield return new object[] { s_invariantCompare, "9999\uFF191010", "1\uFF10", ignoreKanaIgnoreWidthIgnoreCase, 1 }; + + yield return new object[] { s_invariantCompare, "'\u3000'", "' '", ignoreKanaIgnoreWidthIgnoreCase, 0 }; + yield return new object[] { s_invariantCompare, "\uFF1B", ";", ignoreKanaIgnoreWidthIgnoreCase, 0 }; + yield return new object[] { s_invariantCompare, "\uFF08", "(", ignoreKanaIgnoreWidthIgnoreCase, 0 }; + yield return new object[] { s_invariantCompare, "\u30FC", "\uFF70", ignoreKanaIgnoreWidthIgnoreCase, 0 }; + yield return new object[] { s_invariantCompare, "\u30FC", "\uFF0D", ignoreKanaIgnoreWidthIgnoreCase, 1 }; + yield return new object[] { s_invariantCompare, "\u30FC", "\u30FC", ignoreKanaIgnoreWidthIgnoreCase, 0 }; + yield return new object[] { s_invariantCompare, "\u30FC", "\u2015", ignoreKanaIgnoreWidthIgnoreCase, 1 }; + yield return new object[] { s_invariantCompare, "\u30FC", "\u2010", ignoreKanaIgnoreWidthIgnoreCase, 1 }; + + yield return new object[] { s_invariantCompare, "/", "\uFF0F", ignoreKanaIgnoreWidthIgnoreCase, 0 }; + yield return new object[] { s_invariantCompare, "\"", "\uFF02", ignoreKanaIgnoreWidthIgnoreCase, 0 }; + + if (!PlatformDetection.IsWindows7) + { + // For the below string, LCMapStringEx and CompareStringEx on Windows 7 return inconsistent results. + // We'll only run this test case on Win8+ or on non-Windows machines. + yield return new object[] { s_invariantCompare, "'", "\uFF07", ignoreKanaIgnoreWidthIgnoreCase, 0 }; + } } yield return new object[] { s_invariantCompare, "\u3042", "\u30A1", CompareOptions.None, s_expectedHiraganaToKatakanaCompare }; @@ -190,12 +193,12 @@ public static IEnumerable SortKey_TestData() yield return new object[] { s_invariantCompare, "\u304D \u3083", "\u30AD\u3083", CompareOptions.None, -1 }; yield return new object[] { s_invariantCompare, "\u3044", "I", CompareOptions.None, 1 }; - yield return new object[] { s_invariantCompare, "a", "A", CompareOptions.None, -1 }; + yield return new object[] { s_invariantCompare, "a", "A", CompareOptions.None, PlatformDetection.IsHybridGlobalizationOnOSX ? 1 : -1 }; yield return new object[] { s_invariantCompare, "a", "\uFF41", CompareOptions.None, -1 }; yield return new object[] { s_invariantCompare, "ABCDE", "\uFF21\uFF22\uFF23\uFF24\uFF25", CompareOptions.None, -1 }; yield return new object[] { s_invariantCompare, "ABCDE", "\uFF21\uFF22\uFF23D\uFF25", CompareOptions.None, -1 }; yield return new object[] { s_invariantCompare, new string('a', 5555), new string('a', 5554) + "b", CompareOptions.None, -1 }; - yield return new object[] { s_invariantCompare, "ABCDE", "\uFF41\uFF42\uFF23D\uFF25", CompareOptions.None, 1 }; + yield return new object[] { s_invariantCompare, "ABCDE", "\uFF41\uFF42\uFF23D\uFF25", CompareOptions.None, PlatformDetection.IsHybridGlobalizationOnOSX ? -1 : 1 }; yield return new object[] { s_invariantCompare, "\u6FA4", "\u6CA2", CompareOptions.None, 1 }; yield return new object[] { s_invariantCompare, "\u3070\u3073\u3076\u3079\u307C", "\u30D0\u30D3\u30D6\u30D9\u30DC", CompareOptions.None, s_expectedHiraganaToKatakanaCompare }; @@ -206,7 +209,7 @@ public static IEnumerable SortKey_TestData() yield return new object[] { s_invariantCompare, "\u3070\u30DC\uFF8C\uFF9E\uFF8D\uFF9E\u307C", "\u3079\uFF8E\uFF9E", CompareOptions.None, -1 }; yield return new object[] { s_invariantCompare, "\u3070\u3073\uFF8C\uFF9E\uFF8D\uFF9E\u307C", "\u30D6", CompareOptions.None, -1 }; yield return new object[] { s_invariantCompare, "\u3071\u3074\u30D7\u307A", "\uFF8B\uFF9F\uFF8C\uFF9F", CompareOptions.None, -1 }; - yield return new object[] { s_invariantCompare, "\u3070\u30DC\uFF8C\uFF9E\uFF8D\uFF9E\u307C", "\u3070\uFF8E\uFF9E\u30D6", CompareOptions.None, 1 }; + yield return new object[] { s_invariantCompare, "\u3070\u30DC\uFF8C\uFF9E\uFF8D\uFF9E\u307C", "\u3070\uFF8E\uFF9E\u30D6", CompareOptions.None, PlatformDetection.IsHybridGlobalizationOnOSX ? -1 : 1 }; yield return new object[] { s_invariantCompare, "\u3070\u30DC\uFF8C\uFF9E\uFF8D\uFF9E\u307C\u3079\u307C", "\u3079\uFF8E\uFF9E", CompareOptions.None, -1 }; yield return new object[] { s_invariantCompare, "\u3070\uFF8C\uFF9E\uFF8D\uFF9E\u307C", "\u30D6", CompareOptions.None, -1 }; @@ -225,8 +228,8 @@ public static IEnumerable SortKey_TestData() yield return new object[] { s_invariantCompare, "\u2019", "'", CompareOptions.None, 1 }; yield return new object[] { s_invariantCompare, "", "'", CompareOptions.None, -1 }; - yield return new object[] { s_invariantCompare, "\u4E00", "\uFF11", CompareOptions.None, 1 }; - yield return new object[] { s_invariantCompare, "\u2160", "\uFF11", CompareOptions.None, 1 }; + yield return new object[] { s_invariantCompare, "\u4E00", "\uFF11", CompareOptions.None, PlatformDetection.IsHybridGlobalizationOnOSX ? -1 : 1 }; + yield return new object[] { s_invariantCompare, "\u2160", "\uFF11", CompareOptions.None, PlatformDetection.IsHybridGlobalizationOnOSX ? -1 : 1 }; yield return new object[] { s_invariantCompare, "0", "\uFF10", CompareOptions.None, -1 }; yield return new object[] { s_invariantCompare, "10", "1\uFF10", CompareOptions.None, -1 }; yield return new object[] { s_invariantCompare, "1\uFF10", "1\uFF10", CompareOptions.None, 0 }; @@ -236,7 +239,7 @@ public static IEnumerable SortKey_TestData() yield return new object[] { s_invariantCompare, "'\u3000'", "' '", CompareOptions.None, 1 }; yield return new object[] { s_invariantCompare, "\uFF1B", ";", CompareOptions.None, 1 }; yield return new object[] { s_invariantCompare, "\uFF08", "(", CompareOptions.None, 1 }; - yield return new object[] { s_invariantCompare, "\u30FC", "\uFF0D", CompareOptions.None, 1 }; + yield return new object[] { s_invariantCompare, "\u30FC", "\uFF0D", CompareOptions.None, PlatformDetection.IsHybridGlobalizationOnOSX ? -1 : 1 }; yield return new object[] { s_invariantCompare, "\u30FC", "\u30FC", CompareOptions.None, 0 }; yield return new object[] { s_invariantCompare, "\u30FC", "\u2015", CompareOptions.None, 1 }; yield return new object[] { s_invariantCompare, "\u30FC", "\u2010", CompareOptions.None, 1 }; @@ -248,59 +251,63 @@ public static IEnumerable SortKey_TestData() // Turkish yield return new object[] { s_turkishCompare, "i", "I", CompareOptions.None, 1 }; // Android has its own ICU, which doesn't work well with tr - if (!PlatformDetection.IsAndroid && !PlatformDetection.IsLinuxBionic) + if (!PlatformDetection.IsAndroid && !PlatformDetection.IsLinuxBionic && PlatformDetection.IsNotHybridGlobalizationOnOSX) { yield return new object[] { s_turkishCompare, "i", "I", CompareOptions.IgnoreCase, 1 }; yield return new object[] { s_turkishCompare, "i", "\u0130", CompareOptions.IgnoreCase, 0 }; } yield return new object[] { s_invariantCompare, "i", "\u0130", CompareOptions.None, -1 }; - yield return new object[] { s_invariantCompare, "i", "I", CompareOptions.None, -1 }; + yield return new object[] { s_invariantCompare, "i", "I", CompareOptions.None, PlatformDetection.IsHybridGlobalizationOnOSX ? 1 : -1 }; yield return new object[] { s_invariantCompare, "i", "I", CompareOptions.IgnoreCase, 0 }; yield return new object[] { s_invariantCompare, "i", "\u0130", CompareOptions.None, -1 }; yield return new object[] { s_invariantCompare, "i", "\u0130", CompareOptions.IgnoreCase, -1 }; yield return new object[] { s_invariantCompare, "\u00C0", "A\u0300", CompareOptions.None, 0 }; - yield return new object[] { s_invariantCompare, "\u00C0", "a\u0300", CompareOptions.None, 1 }; + yield return new object[] { s_invariantCompare, "\u00C0", "a\u0300", CompareOptions.None, PlatformDetection.IsHybridGlobalizationOnOSX ? - 1 : 1 }; yield return new object[] { s_invariantCompare, "\u00C0", "a\u0300", CompareOptions.IgnoreCase, 0 }; yield return new object[] { s_invariantCompare, "FooBA\u0300R", "FooB\u00C0R", CompareOptions.IgnoreNonSpace, 0 }; - yield return new object[] { s_invariantCompare, "Test's", "Tests", CompareOptions.IgnoreSymbols, 0 }; - yield return new object[] { s_invariantCompare, "Test's", "Tests", CompareOptions.StringSort, -1 }; - yield return new object[] { s_invariantCompare, new string('a', 5555), new string('a', 5555), CompareOptions.None, 0 }; yield return new object[] { s_invariantCompare, "foobar", "FooB\u00C0R", CompareOptions.IgnoreNonSpace | CompareOptions.IgnoreCase, 0 }; - yield return new object[] { s_invariantCompare, "foobar", "FooB\u00C0R", CompareOptions.IgnoreNonSpace, -1 }; + yield return new object[] { s_invariantCompare, "foobar", "FooB\u00C0R", CompareOptions.IgnoreNonSpace, PlatformDetection.IsHybridGlobalizationOnOSX ? 1 : -1 }; - yield return new object[] { s_invariantCompare, "\uFF9E", "\u3099", CompareOptions.IgnoreNonSpace, 0 }; - yield return new object[] { s_invariantCompare, "\uFF9E", "\u3099", CompareOptions.IgnoreCase, 0 }; yield return new object[] { s_invariantCompare, "\u20A9", "\uFFE6", CompareOptions.IgnoreWidth, 0 }; yield return new object[] { s_invariantCompare, "\u20A9", "\uFFE6", CompareOptions.IgnoreCase, -1 }; yield return new object[] { s_invariantCompare, "\u20A9", "\uFFE6", CompareOptions.None, -1 }; - yield return new object[] { s_invariantCompare, "\u0021", "\uFF01", CompareOptions.IgnoreSymbols, 0 }; - yield return new object[] { s_invariantCompare, "\u00A2", "\uFFE0", CompareOptions.IgnoreSymbols, 0 }; - yield return new object[] { s_invariantCompare, "$", "&", CompareOptions.IgnoreSymbols, 0 }; - yield return new object[] { s_invariantCompare, "\uFF65", "\u30FB", CompareOptions.IgnoreSymbols, 0 }; yield return new object[] { s_invariantCompare, "\u0021", "\uFF01", CompareOptions.IgnoreWidth, 0 }; yield return new object[] { s_invariantCompare, "\u0021", "\uFF01", CompareOptions.None, -1 }; yield return new object[] { s_invariantCompare, "\uFF66", "\u30F2", CompareOptions.IgnoreWidth, 0 }; - yield return new object[] { s_invariantCompare, "\uFF66", "\u30F2", CompareOptions.IgnoreSymbols, s_expectedHalfToFullFormsComparison }; yield return new object[] { s_invariantCompare, "\uFF66", "\u30F2", CompareOptions.IgnoreCase, s_expectedHalfToFullFormsComparison }; yield return new object[] { s_invariantCompare, "\uFF66", "\u30F2", CompareOptions.IgnoreNonSpace, s_expectedHalfToFullFormsComparison }; yield return new object[] { s_invariantCompare, "\uFF66", "\u30F2", CompareOptions.None, s_expectedHalfToFullFormsComparison }; - yield return new object[] { s_invariantCompare, "\u3060", "\u30C0", CompareOptions.IgnoreKanaType, 0 }; yield return new object[] { s_invariantCompare, "\u3060", "\u30C0", CompareOptions.IgnoreCase, s_expectedHiraganaToKatakanaCompare }; - yield return new object[] { s_invariantCompare, "c", "C", CompareOptions.IgnoreKanaType, -1 }; // Spanish yield return new object[] { new CultureInfo("es-ES").CompareInfo, "llegar", "lugar", CompareOptions.None, -1 }; - // Zero-weight code points - // In both NLS (Windows) and ICU the code point U+200C ZERO WIDTH NON-JOINER has a zero weight, - // so it's compared as equal to the empty string. This means that we can't special-case GetHashCode("") - // and return a fixed value; we actually need to call the underlying OS or ICU API to calculate the sort key. - yield return new object[] { s_invariantCompare, "", "\u200c", CompareOptions.None, 0 }; + if (PlatformDetection.IsNotHybridGlobalizationOnOSX) + { + yield return new object[] { s_invariantCompare, "\uFF9E", "\u3099", CompareOptions.IgnoreNonSpace, 0 }; + yield return new object[] { s_invariantCompare, "\uFF9E", "\u3099", CompareOptions.IgnoreCase, 0 }; + + yield return new object[] { s_invariantCompare, "\u3060", "\u30C0", CompareOptions.IgnoreKanaType, 0 }; + yield return new object[] { s_invariantCompare, "c", "C", CompareOptions.IgnoreKanaType, -1 }; + // Zero-weight code points + // In both NLS (Windows) and ICU the code point U+200C ZERO WIDTH NON-JOINER has a zero weight, + // so it's compared as equal to the empty string. This means that we can't special-case GetHashCode("") + // and return a fixed value; we actually need to call the underlying OS or ICU API to calculate the sort key. + yield return new object[] { s_invariantCompare, "", "\u200c", CompareOptions.None, 0 }; + + yield return new object[] { s_invariantCompare, "Test's", "Tests", CompareOptions.IgnoreSymbols, 0 }; + yield return new object[] { s_invariantCompare, "Test's", "Tests", CompareOptions.StringSort, -1 }; + yield return new object[] { s_invariantCompare, "\u0021", "\uFF01", CompareOptions.IgnoreSymbols, 0 }; + yield return new object[] { s_invariantCompare, "\u00A2", "\uFFE0", CompareOptions.IgnoreSymbols, 0 }; + yield return new object[] { s_invariantCompare, "$", "&", CompareOptions.IgnoreSymbols, 0 }; + yield return new object[] { s_invariantCompare, "\uFF65", "\u30FB", CompareOptions.IgnoreSymbols, 0 }; + yield return new object[] { s_invariantCompare, "\uFF66", "\u30F2", CompareOptions.IgnoreSymbols, s_expectedHalfToFullFormsComparison }; + } } public static IEnumerable IndexOf_TestData() @@ -395,7 +402,7 @@ public void SortKeyTestNotSupported() private static bool WindowsVersionHasTheCompareStringRegression => PlatformDetection.IsNlsGlobalization && CompareStringEx("", NORM_LINGUISTIC_CASING, "", 0, "\u200C", 1, IntPtr.Zero, IntPtr.Zero, 0) != 2; - [ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.IsNotHybridGlobalization))] + [ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.IsNotHybridGlobalizationOnBrowser))] [MemberData(nameof(SortKey_TestData))] public void SortKeyTest(CompareInfo compareInfo, string string1, string string2, CompareOptions options, int expectedSign) { @@ -405,7 +412,7 @@ public void SortKeyTest(CompareInfo compareInfo, string string1, string string2, Assert.Equal(expectedSign, Math.Sign(SortKey.Compare(sk1, sk2))); Assert.Equal(expectedSign == 0, sk1.Equals(sk2)); - if (!WindowsVersionHasTheCompareStringRegression) + if (!WindowsVersionHasTheCompareStringRegression && IsNotHybridGlobalization) { Assert.Equal(Math.Sign(compareInfo.Compare(string1, string2, options)), Math.Sign(SortKey.Compare(sk1, sk2))); } @@ -444,7 +451,7 @@ unsafe static void RunSpanSortKeyTest(CompareInfo compareInfo, ReadOnlySpan Date: Thu, 30 Nov 2023 15:16:18 +0100 Subject: [PATCH 08/12] Fix build failure --- .../System.Globalization.Tests/CompareInfo/CompareInfoTests.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.cs b/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.cs index bb25720bbff101..51da4f84683dbf 100644 --- a/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.cs +++ b/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.cs @@ -412,7 +412,7 @@ public void SortKeyTest(CompareInfo compareInfo, string string1, string string2, Assert.Equal(expectedSign, Math.Sign(SortKey.Compare(sk1, sk2))); Assert.Equal(expectedSign == 0, sk1.Equals(sk2)); - if (!WindowsVersionHasTheCompareStringRegression && IsNotHybridGlobalization) + if (!WindowsVersionHasTheCompareStringRegression && PlatformDetection.IsNotHybridGlobalizationOnOSX) { Assert.Equal(Math.Sign(compareInfo.Compare(string1, string2, options)), Math.Sign(SortKey.Compare(sk1, sk2))); } From 3ed713f8a3e04440d318b39fe28149c515c67304 Mon Sep 17 00:00:00 2001 From: Meri Khamoyan Date: Mon, 4 Dec 2023 11:02:56 +0100 Subject: [PATCH 09/12] Refactor GetSortKey function --- .../System/Globalization/CompareInfo.Icu.cs | 2 +- .../CompareInfo/CompareInfoTests.cs | 16 +++---- .../pal_collation.m | 42 +++++++++++++++---- 3 files changed, 43 insertions(+), 17 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Icu.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Icu.cs index 6562b8e535e97a..5cc1354e6b8039 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Icu.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Icu.cs @@ -718,7 +718,7 @@ private unsafe SortKey IcuCreateSortKey(string source, CompareOptions options) #if TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS if (GlobalizationMode.Hybrid) { - if (Interop.Globalization.GetSortKeyNative(m_name, m_name.Length, pSource, source.Length, null, 0, options) != sortKeyLength) + if (Interop.Globalization.GetSortKeyNative(m_name, m_name.Length, pSource, source.Length, pSortKey, sortKeyLength, options) != sortKeyLength) { throw new ArgumentException(SR.Arg_ExternalException); } diff --git a/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.cs b/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.cs index 51da4f84683dbf..10481ddba62ac5 100644 --- a/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.cs +++ b/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.cs @@ -60,7 +60,7 @@ public void EqualsTest(CompareInfo compare1, object value, bool expected) new object[] { "", CompareOptions.None, "\u200c", CompareOptions.None, true }, // see comment at bottom of SortKey_TestData }; - [ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.IsNotHybridGlobalization))] + [ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.IsNotHybridGlobalizationOnBrowser))] [MemberData(nameof(GetHashCodeTestData))] public void GetHashCodeTest(string source1, CompareOptions options1, string source2, CompareOptions options2, bool expected) { @@ -294,11 +294,6 @@ public static IEnumerable SortKey_TestData() yield return new object[] { s_invariantCompare, "\u3060", "\u30C0", CompareOptions.IgnoreKanaType, 0 }; yield return new object[] { s_invariantCompare, "c", "C", CompareOptions.IgnoreKanaType, -1 }; - // Zero-weight code points - // In both NLS (Windows) and ICU the code point U+200C ZERO WIDTH NON-JOINER has a zero weight, - // so it's compared as equal to the empty string. This means that we can't special-case GetHashCode("") - // and return a fixed value; we actually need to call the underlying OS or ICU API to calculate the sort key. - yield return new object[] { s_invariantCompare, "", "\u200c", CompareOptions.None, 0 }; yield return new object[] { s_invariantCompare, "Test's", "Tests", CompareOptions.IgnoreSymbols, 0 }; yield return new object[] { s_invariantCompare, "Test's", "Tests", CompareOptions.StringSort, -1 }; @@ -308,6 +303,11 @@ public static IEnumerable SortKey_TestData() yield return new object[] { s_invariantCompare, "\uFF65", "\u30FB", CompareOptions.IgnoreSymbols, 0 }; yield return new object[] { s_invariantCompare, "\uFF66", "\u30F2", CompareOptions.IgnoreSymbols, s_expectedHalfToFullFormsComparison }; } + // Zero-weight code points + // In both NLS (Windows) and ICU the code point U+200C ZERO WIDTH NON-JOINER has a zero weight, + // so it's compared as equal to the empty string. This means that we can't special-case GetHashCode("") + // and return a fixed value; we actually need to call the underlying OS or ICU API to calculate the sort key. + yield return new object[] { s_invariantCompare, "", "\u200c", CompareOptions.None, 0 }; } public static IEnumerable IndexOf_TestData() @@ -536,7 +536,7 @@ public void VersionTest() Assert.NotEqual(sv1.SortId, sv2.SortId); } - [ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.IsNotHybridGlobalization))] + [ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.IsNotHybridGlobalizationOnBrowser))] [MemberData(nameof(GetHashCodeTestData))] public void GetHashCode_Span(string source1, CompareOptions options1, string source2, CompareOptions options2, bool expectSameHashCode) { @@ -553,7 +553,7 @@ public void GetHashCode_Span(string source1, CompareOptions options1, string sou Assert.Equal(expectSameHashCode, hashOfSource1AsSpan == hashOfSource2AsSpan); } - [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNotHybridGlobalization))] + [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNotHybridGlobalizationOnBrowser))] public void GetHashCode_NullAndEmptySpan() { // Ensure that null spans and non-null empty spans produce the same hash code. diff --git a/src/native/libs/System.Globalization.Native/pal_collation.m b/src/native/libs/System.Globalization.Native/pal_collation.m index fd16c72aced5fb..8af4165bb73e64 100644 --- a/src/native/libs/System.Globalization.Native/pal_collation.m +++ b/src/native/libs/System.Globalization.Native/pal_collation.m @@ -295,6 +295,21 @@ int32_t GlobalizationNative_EndsWithNative(const uint16_t* localeName, int32_t l } } +NSString* RemoveInvalidCharacters(NSString* source) +{ + NSMutableString *validString = [NSMutableString stringWithCapacity:[source length]]; + + [source enumerateSubstringsInRange:NSMakeRange(0, [source length]) + options:NSStringEnumerationByComposedCharacterSequences + usingBlock:^(NSString *substring, NSRange substringRange, NSRange enclosingRange, BOOL *stop){ + // Check if the substring can be encoded in UTF-8 + if ([substring lengthOfBytesUsingEncoding:NSUTF8StringEncoding] > 0) + [validString appendString:substring]; + }]; + + return validString; +} + int32_t GlobalizationNative_GetSortKeyNative(const uint16_t* localeName, int32_t lNameLength, const UChar* lpStr, int32_t cwStrLength, uint8_t* sortKey, int32_t cbSortKeyLength, int32_t options) { @@ -306,7 +321,14 @@ int32_t GlobalizationNative_GetSortKeyNative(const uint16_t* localeName, int32_t return 1; } NSString *sourceString = [NSString stringWithCharacters: lpStr length: cwStrLength]; - NSString *sourceStringCleaned = RemoveWeightlessCharacters(sourceString).precomposedStringWithCanonicalMapping;; + NSString *sourceStringCleaned = RemoveWeightlessCharacters(sourceString).precomposedStringWithCanonicalMapping; + // If the string is empty after removing weightless characters, return 1 + if(sourceStringCleaned.length == 0) + { + if (sortKey != NULL) + sortKey[0] = '\0'; + return 1; + } NSLocale *locale = GetCurrentLocale(localeName, lNameLength); NSStringCompareOptions comparisonOptions = options == 0 ? 0 : ConvertFromCompareOptionsToNSStringCompareOptions(options); @@ -318,16 +340,20 @@ int32_t GlobalizationNative_GetSortKeyNative(const uint16_t* localeName, int32_t const char *utf8Bytes = [transformedString UTF8String]; NSData *dataToUse = nil; NSUInteger utf8Length = 0; - if (utf8Bytes != NULL) { + if (utf8Bytes != NULL) + { utf8Length = [transformedString lengthOfBytesUsingEncoding:NSUTF8StringEncoding]; - dataToUse = [NSData dataWithBytes:utf8Bytes length:utf8Length]; - } else { - // Convert the string to UTF-16 representation - dataToUse = [transformedString dataUsingEncoding:NSUTF16StringEncoding]; - utf8Length = ([dataToUse length] / sizeof(uint16_t)) * 2; + } + else // If the string cannot be encoded in UTF-8, we need to remove the invalid characters + { + NSString *validString = RemoveInvalidCharacters(transformedString); + utf8Bytes = [validString UTF8String]; + utf8Length = [validString lengthOfBytesUsingEncoding:NSUTF8StringEncoding]; } - if (dataToUse != nil) { + dataToUse = [NSData dataWithBytes:utf8Bytes length:utf8Length]; + if (dataToUse != nil) + { const uint8_t *bytesToCopy = (const uint8_t *)[dataToUse bytes]; if (sortKey != NULL) memcpy(sortKey, bytesToCopy, utf8Length); From 32cc7f192b54c9923f54bae907a5e09ca7073e05 Mon Sep 17 00:00:00 2001 From: Meri Khamoyan <96171496+mkhamoyan@users.noreply.github.com> Date: Mon, 4 Dec 2023 16:45:50 +0100 Subject: [PATCH 10/12] Update src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Icu.cs Co-authored-by: Mitchell Hwang <16830051+mdh1418@users.noreply.github.com> --- .../src/System/Globalization/CompareInfo.Icu.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Icu.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Icu.cs index 5cc1354e6b8039..f2c7dd254ae2ff 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Icu.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Icu.cs @@ -878,7 +878,7 @@ private unsafe int IcuGetHashCodeOfString(ReadOnlySpan source, CompareOpti sortKeyLength = Interop.Globalization.GetSortKeyNative(m_name, m_name.Length, pSource, source.Length, pSortKey, sortKey.Length, options); } else - #endif +#endif { sortKeyLength = Interop.Globalization.GetSortKey(_sortHandle, pSource, source.Length, pSortKey, sortKey.Length, options); } From dd9bab26cb00e8febb46f8f2adb1c989cd4676b8 Mon Sep 17 00:00:00 2001 From: Meri Khamoyan Date: Tue, 5 Dec 2023 13:29:08 +0100 Subject: [PATCH 11/12] Refactor GetSortKey function --- .../CompareInfo/CompareInfoTests.cs | 50 +++++++++---------- .../pal_collation.m | 46 +++-------------- 2 files changed, 33 insertions(+), 63 deletions(-) diff --git a/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.cs b/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.cs index 10481ddba62ac5..8ca1892cde0c85 100644 --- a/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.cs +++ b/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.cs @@ -192,11 +192,11 @@ public static IEnumerable SortKey_TestData() yield return new object[] { s_invariantCompare, "\u304D\u3083", "\u30AD\u3083", CompareOptions.None, s_expectedHiraganaToKatakanaCompare }; yield return new object[] { s_invariantCompare, "\u304D \u3083", "\u30AD\u3083", CompareOptions.None, -1 }; - yield return new object[] { s_invariantCompare, "\u3044", "I", CompareOptions.None, 1 }; + yield return new object[] { s_invariantCompare, "\u3044", "I", CompareOptions.None, PlatformDetection.IsHybridGlobalizationOnOSX ? -1 : 1 }; yield return new object[] { s_invariantCompare, "a", "A", CompareOptions.None, PlatformDetection.IsHybridGlobalizationOnOSX ? 1 : -1 }; - yield return new object[] { s_invariantCompare, "a", "\uFF41", CompareOptions.None, -1 }; - yield return new object[] { s_invariantCompare, "ABCDE", "\uFF21\uFF22\uFF23\uFF24\uFF25", CompareOptions.None, -1 }; - yield return new object[] { s_invariantCompare, "ABCDE", "\uFF21\uFF22\uFF23D\uFF25", CompareOptions.None, -1 }; + yield return new object[] { s_invariantCompare, "a", "\uFF41", CompareOptions.None,PlatformDetection.IsHybridGlobalizationOnOSX ? 1 : -1 }; + yield return new object[] { s_invariantCompare, "ABCDE", "\uFF21\uFF22\uFF23\uFF24\uFF25", CompareOptions.None, PlatformDetection.IsHybridGlobalizationOnOSX ? 1 : -1 }; + yield return new object[] { s_invariantCompare, "ABCDE", "\uFF21\uFF22\uFF23D\uFF25", CompareOptions.None, PlatformDetection.IsHybridGlobalizationOnOSX ? 1 : -1 }; yield return new object[] { s_invariantCompare, new string('a', 5555), new string('a', 5554) + "b", CompareOptions.None, -1 }; yield return new object[] { s_invariantCompare, "ABCDE", "\uFF41\uFF42\uFF23D\uFF25", CompareOptions.None, PlatformDetection.IsHybridGlobalizationOnOSX ? -1 : 1 }; yield return new object[] { s_invariantCompare, "\u6FA4", "\u6CA2", CompareOptions.None, 1 }; @@ -209,7 +209,7 @@ public static IEnumerable SortKey_TestData() yield return new object[] { s_invariantCompare, "\u3070\u30DC\uFF8C\uFF9E\uFF8D\uFF9E\u307C", "\u3079\uFF8E\uFF9E", CompareOptions.None, -1 }; yield return new object[] { s_invariantCompare, "\u3070\u3073\uFF8C\uFF9E\uFF8D\uFF9E\u307C", "\u30D6", CompareOptions.None, -1 }; yield return new object[] { s_invariantCompare, "\u3071\u3074\u30D7\u307A", "\uFF8B\uFF9F\uFF8C\uFF9F", CompareOptions.None, -1 }; - yield return new object[] { s_invariantCompare, "\u3070\u30DC\uFF8C\uFF9E\uFF8D\uFF9E\u307C", "\u3070\uFF8E\uFF9E\u30D6", CompareOptions.None, PlatformDetection.IsHybridGlobalizationOnOSX ? -1 : 1 }; + yield return new object[] { s_invariantCompare, "\u3070\u30DC\uFF8C\uFF9E\uFF8D\uFF9E\u307C", "\u3070\uFF8E\uFF9E\u30D6", CompareOptions.None, 1 }; yield return new object[] { s_invariantCompare, "\u3070\u30DC\uFF8C\uFF9E\uFF8D\uFF9E\u307C\u3079\u307C", "\u3079\uFF8E\uFF9E", CompareOptions.None, -1 }; yield return new object[] { s_invariantCompare, "\u3070\uFF8C\uFF9E\uFF8D\uFF9E\u307C", "\u30D6", CompareOptions.None, -1 }; @@ -222,31 +222,31 @@ public static IEnumerable SortKey_TestData() yield return new object[] { s_invariantCompare, "\u68EE\u9D0E\u5916", "\u68EE\u9DD7\u5916", CompareOptions.None, -1 }; yield return new object[] { s_invariantCompare, "\u68EE\u9DD7\u5916", "\u68EE\u9DD7\u5916", CompareOptions.None, 0 }; - yield return new object[] { s_invariantCompare, "\u2019\u2019\u2019\u2019", "''''", CompareOptions.None, 1 }; - yield return new object[] { s_invariantCompare, "\u2019\u2019\u2019\u2019", "''''", CompareOptions.None, 1 }; - yield return new object[] { s_invariantCompare, "\u2019\u2019\u2019\u2019", "''''", CompareOptions.None, 1 }; - yield return new object[] { s_invariantCompare, "\u2019", "'", CompareOptions.None, 1 }; + yield return new object[] { s_invariantCompare, "\u2019\u2019\u2019\u2019", "''''", CompareOptions.None, PlatformDetection.IsHybridGlobalizationOnOSX ? -1 : 1 }; + yield return new object[] { s_invariantCompare, "\u2019\u2019\u2019\u2019", "''''", CompareOptions.None, PlatformDetection.IsHybridGlobalizationOnOSX ? -1 : 1 }; + yield return new object[] { s_invariantCompare, "\u2019\u2019\u2019\u2019", "''''", CompareOptions.None, PlatformDetection.IsHybridGlobalizationOnOSX ? -1 : 1 }; + yield return new object[] { s_invariantCompare, "\u2019", "'", CompareOptions.None, PlatformDetection.IsHybridGlobalizationOnOSX ? -1 : 1 }; yield return new object[] { s_invariantCompare, "", "'", CompareOptions.None, -1 }; yield return new object[] { s_invariantCompare, "\u4E00", "\uFF11", CompareOptions.None, PlatformDetection.IsHybridGlobalizationOnOSX ? -1 : 1 }; - yield return new object[] { s_invariantCompare, "\u2160", "\uFF11", CompareOptions.None, PlatformDetection.IsHybridGlobalizationOnOSX ? -1 : 1 }; - yield return new object[] { s_invariantCompare, "0", "\uFF10", CompareOptions.None, -1 }; - yield return new object[] { s_invariantCompare, "10", "1\uFF10", CompareOptions.None, -1 }; + yield return new object[] { s_invariantCompare, "\u2160", "\uFF11", CompareOptions.None, 1 }; + yield return new object[] { s_invariantCompare, "0", "\uFF10", CompareOptions.None, PlatformDetection.IsHybridGlobalizationOnOSX ? 1 : -1 }; + yield return new object[] { s_invariantCompare, "10", "1\uFF10", CompareOptions.None, PlatformDetection.IsHybridGlobalizationOnOSX ? 1 : -1 }; yield return new object[] { s_invariantCompare, "1\uFF10", "1\uFF10", CompareOptions.None, 0 }; yield return new object[] { s_invariantCompare, "9999\uFF1910", "1\uFF10", CompareOptions.None, 1 }; yield return new object[] { s_invariantCompare, "9999\uFF191010", "1\uFF10", CompareOptions.None, 1 }; - yield return new object[] { s_invariantCompare, "'\u3000'", "' '", CompareOptions.None, 1 }; - yield return new object[] { s_invariantCompare, "\uFF1B", ";", CompareOptions.None, 1 }; - yield return new object[] { s_invariantCompare, "\uFF08", "(", CompareOptions.None, 1 }; - yield return new object[] { s_invariantCompare, "\u30FC", "\uFF0D", CompareOptions.None, PlatformDetection.IsHybridGlobalizationOnOSX ? -1 : 1 }; + yield return new object[] { s_invariantCompare, "'\u3000'", "' '", CompareOptions.None, PlatformDetection.IsHybridGlobalizationOnOSX ? -1 : 1 }; + yield return new object[] { s_invariantCompare, "\uFF1B", ";", CompareOptions.None, PlatformDetection.IsHybridGlobalizationOnOSX ? -1 : 1 }; + yield return new object[] { s_invariantCompare, "\uFF08", "(", CompareOptions.None, PlatformDetection.IsHybridGlobalizationOnOSX ? -1 : 1 }; + yield return new object[] { s_invariantCompare, "\u30FC", "\uFF0D", CompareOptions.None, 1 }; yield return new object[] { s_invariantCompare, "\u30FC", "\u30FC", CompareOptions.None, 0 }; yield return new object[] { s_invariantCompare, "\u30FC", "\u2015", CompareOptions.None, 1 }; yield return new object[] { s_invariantCompare, "\u30FC", "\u2010", CompareOptions.None, 1 }; - yield return new object[] { s_invariantCompare, "/", "\uFF0F", CompareOptions.None, -1 }; - yield return new object[] { s_invariantCompare, "'", "\uFF07", CompareOptions.None, -1 }; - yield return new object[] { s_invariantCompare, "\"", "\uFF02", CompareOptions.None, -1 }; + yield return new object[] { s_invariantCompare, "/", "\uFF0F", CompareOptions.None, PlatformDetection.IsHybridGlobalizationOnOSX ? 1 : -1 }; + yield return new object[] { s_invariantCompare, "'", "\uFF07", CompareOptions.None, PlatformDetection.IsHybridGlobalizationOnOSX ? 1 : -1 }; + yield return new object[] { s_invariantCompare, "\"", "\uFF02", CompareOptions.None, PlatformDetection.IsHybridGlobalizationOnOSX ? 1 : -1 }; // Turkish yield return new object[] { s_turkishCompare, "i", "I", CompareOptions.None, 1 }; @@ -256,10 +256,10 @@ public static IEnumerable SortKey_TestData() yield return new object[] { s_turkishCompare, "i", "I", CompareOptions.IgnoreCase, 1 }; yield return new object[] { s_turkishCompare, "i", "\u0130", CompareOptions.IgnoreCase, 0 }; } - yield return new object[] { s_invariantCompare, "i", "\u0130", CompareOptions.None, -1 }; + yield return new object[] { s_invariantCompare, "i", "\u0130", CompareOptions.None, PlatformDetection.IsHybridGlobalizationOnOSX ? 1 : -1 }; yield return new object[] { s_invariantCompare, "i", "I", CompareOptions.None, PlatformDetection.IsHybridGlobalizationOnOSX ? 1 : -1 }; yield return new object[] { s_invariantCompare, "i", "I", CompareOptions.IgnoreCase, 0 }; - yield return new object[] { s_invariantCompare, "i", "\u0130", CompareOptions.None, -1 }; + yield return new object[] { s_invariantCompare, "i", "\u0130", CompareOptions.None, PlatformDetection.IsHybridGlobalizationOnOSX ? 1 : -1 }; yield return new object[] { s_invariantCompare, "i", "\u0130", CompareOptions.IgnoreCase, -1 }; yield return new object[] { s_invariantCompare, "\u00C0", "A\u0300", CompareOptions.None, 0 }; @@ -275,12 +275,12 @@ public static IEnumerable SortKey_TestData() yield return new object[] { s_invariantCompare, "\u20A9", "\uFFE6", CompareOptions.IgnoreCase, -1 }; yield return new object[] { s_invariantCompare, "\u20A9", "\uFFE6", CompareOptions.None, -1 }; yield return new object[] { s_invariantCompare, "\u0021", "\uFF01", CompareOptions.IgnoreWidth, 0 }; - yield return new object[] { s_invariantCompare, "\u0021", "\uFF01", CompareOptions.None, -1 }; + yield return new object[] { s_invariantCompare, "\u0021", "\uFF01", CompareOptions.None, PlatformDetection.IsHybridGlobalizationOnOSX ? 1 : -1 }; yield return new object[] { s_invariantCompare, "\uFF66", "\u30F2", CompareOptions.IgnoreWidth, 0 }; - yield return new object[] { s_invariantCompare, "\uFF66", "\u30F2", CompareOptions.IgnoreCase, s_expectedHalfToFullFormsComparison }; - yield return new object[] { s_invariantCompare, "\uFF66", "\u30F2", CompareOptions.IgnoreNonSpace, s_expectedHalfToFullFormsComparison }; - yield return new object[] { s_invariantCompare, "\uFF66", "\u30F2", CompareOptions.None, s_expectedHalfToFullFormsComparison }; + yield return new object[] { s_invariantCompare, "\uFF66", "\u30F2", CompareOptions.IgnoreCase, PlatformDetection.IsHybridGlobalizationOnOSX ? -1 : s_expectedHalfToFullFormsComparison }; + yield return new object[] { s_invariantCompare, "\uFF66", "\u30F2", CompareOptions.IgnoreNonSpace, PlatformDetection.IsHybridGlobalizationOnOSX ? -1 : s_expectedHalfToFullFormsComparison }; + yield return new object[] { s_invariantCompare, "\uFF66", "\u30F2", CompareOptions.None, PlatformDetection.IsHybridGlobalizationOnOSX ? -1 : s_expectedHalfToFullFormsComparison }; yield return new object[] { s_invariantCompare, "\u3060", "\u30C0", CompareOptions.IgnoreCase, s_expectedHiraganaToKatakanaCompare }; diff --git a/src/native/libs/System.Globalization.Native/pal_collation.m b/src/native/libs/System.Globalization.Native/pal_collation.m index 8af4165bb73e64..727024a65bbf8a 100644 --- a/src/native/libs/System.Globalization.Native/pal_collation.m +++ b/src/native/libs/System.Globalization.Native/pal_collation.m @@ -295,21 +295,6 @@ int32_t GlobalizationNative_EndsWithNative(const uint16_t* localeName, int32_t l } } -NSString* RemoveInvalidCharacters(NSString* source) -{ - NSMutableString *validString = [NSMutableString stringWithCapacity:[source length]]; - - [source enumerateSubstringsInRange:NSMakeRange(0, [source length]) - options:NSStringEnumerationByComposedCharacterSequences - usingBlock:^(NSString *substring, NSRange substringRange, NSRange enclosingRange, BOOL *stop){ - // Check if the substring can be encoded in UTF-8 - if ([substring lengthOfBytesUsingEncoding:NSUTF8StringEncoding] > 0) - [validString appendString:substring]; - }]; - - return validString; -} - int32_t GlobalizationNative_GetSortKeyNative(const uint16_t* localeName, int32_t lNameLength, const UChar* lpStr, int32_t cwStrLength, uint8_t* sortKey, int32_t cbSortKeyLength, int32_t options) { @@ -336,29 +321,14 @@ int32_t GlobalizationNative_GetSortKeyNative(const uint16_t* localeName, int32_t // Generate a sort key for the original string based on the locale NSString *transformedString = [sourceStringCleaned stringByFoldingWithOptions:comparisonOptions locale:locale]; - // Convert the string to UTF-8 representation - const char *utf8Bytes = [transformedString UTF8String]; - NSData *dataToUse = nil; - NSUInteger utf8Length = 0; - if (utf8Bytes != NULL) - { - utf8Length = [transformedString lengthOfBytesUsingEncoding:NSUTF8StringEncoding]; - } - else // If the string cannot be encoded in UTF-8, we need to remove the invalid characters - { - NSString *validString = RemoveInvalidCharacters(transformedString); - utf8Bytes = [validString UTF8String]; - utf8Length = [validString lengthOfBytesUsingEncoding:NSUTF8StringEncoding]; - } - - dataToUse = [NSData dataWithBytes:utf8Bytes length:utf8Length]; - if (dataToUse != nil) - { - const uint8_t *bytesToCopy = (const uint8_t *)[dataToUse bytes]; - if (sortKey != NULL) - memcpy(sortKey, bytesToCopy, utf8Length); - return utf8Length; - } + NSUInteger transformedStringBytes = [transformedString lengthOfBytesUsingEncoding: NSUTF16StringEncoding]; + if (sortKey == NULL) + return (int32_t)transformedStringBytes; + NSRange range = NSMakeRange(0, [transformedString length]); + NSUInteger usedLength = 0; + BOOL result = [transformedString getBytes:sortKey maxLength:transformedStringBytes usedLength:&usedLength encoding:NSUTF16StringEncoding options:0 range:range remainingRange:NULL]; + if (result) + return (int32_t)usedLength; return 0; } From 52243ba888f51f11267d38a21967073568c4f8df Mon Sep 17 00:00:00 2001 From: Meri Khamoyan Date: Wed, 6 Dec 2023 08:44:06 +0100 Subject: [PATCH 12/12] update documentation --- docs/design/features/globalization-hybrid-mode.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/design/features/globalization-hybrid-mode.md b/docs/design/features/globalization-hybrid-mode.md index 44e4fd5d16c040..2be75ac3150e6b 100644 --- a/docs/design/features/globalization-hybrid-mode.md +++ b/docs/design/features/globalization-hybrid-mode.md @@ -465,6 +465,8 @@ Affected public APIs: Implemeneted using [stringByFoldingWithOptions:locale:](https://developer.apple.com/documentation/foundation/nsstring/1413779-stringbyfoldingwithoptions) +Note: This implementation does not construct SortKeys like ICU ucol_getSortKey does, and might not adhere to the specifications specifications of SortKey such as SortKeys from different collators not being comparable and merging sortkeys. + ## Case change