Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add SearchValues<string> #88394

Merged
merged 43 commits into from
Aug 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
1566d85
Add SearchValues<string>
MihaZupan Jul 4, 2023
c56168d
Fix AhoCorasickNode DebuggerDisplay
MihaZupan Jul 4, 2023
68d3dfd
Typos & rewordings
MihaZupan Jul 5, 2023
515104c
Use [InlineArray]
MihaZupan Jul 5, 2023
2a5bef5
Simplify NLS variant of AhoCorasick
MihaZupan Jul 5, 2023
7dc3186
Remove 2 TODOs
MihaZupan Jul 5, 2023
3f99c76
Add a few more simple test cases
MihaZupan Jul 5, 2023
b412fd9
Add BurntSushi/aho-corasick to THIRD-PARTY-NOTICES
MihaZupan Jul 5, 2023
7f8bae2
Add more rng to stress runs
MihaZupan Jul 5, 2023
c558404
Remove DebuggerDisplay on AhoCorasickNode
MihaZupan Jul 5, 2023
15996f4
More comments around AhoCorasickNode.TryCreateJumpTable
MihaZupan Jul 5, 2023
5d4cacc
Reduce RabinKarp ctor allocations
MihaZupan Jul 8, 2023
b8757e4
Move ordinal helpers around a bit
MihaZupan Jul 8, 2023
822f5c6
Typo
MihaZupan Jul 8, 2023
2e70415
Remove IValueLength
MihaZupan Jul 8, 2023
676c461
Dedup some ICaseSensitivity.Equals loops
MihaZupan Jul 9, 2023
6e31d6e
Avoid some costs in RabinKarp for long values
MihaZupan Jul 9, 2023
38a6436
Remove now-unused maxValueLength field
MihaZupan Jul 9, 2023
2f7a966
Add comment about MaxCombinedFrequency in AC
MihaZupan Jul 10, 2023
0685172
React to compiler changes to InlineArray
MihaZupan Jul 19, 2023
57f2c3b
Link the roslyn-analyzers issue
MihaZupan Jul 21, 2023
5f46a3a
Remove >>> workaround now that #86841 is merged
MihaZupan Jul 22, 2023
9ea4784
Improve comments around the Teddy implementation
MihaZupan Jul 22, 2023
73ca19e
Improve comments around StringSearchValuesHelper
MihaZupan Jul 23, 2023
e672f5f
Tweak RabinKarp comments
MihaZupan Jul 23, 2023
bcf6a8e
Improve comments around Aho-Corasick
MihaZupan Jul 23, 2023
de3dedd
Remove some SearchValues indirection
MihaZupan Jul 23, 2023
9d02edb
Remove AggressiveInlining in a few places
MihaZupan Jul 23, 2023
3411289
Improve comments around ThreeChars SearchValues
MihaZupan Jul 23, 2023
d4f63d6
Fix assert and code typo
MihaZupan Jul 23, 2023
5cb16fa
Reduce overhead for creating single-value SearchValues
MihaZupan Jul 24, 2023
e66d8d9
Reword TODO to 'Potential optimization'
MihaZupan Jul 24, 2023
36dc66b
Add a few more test cases for single values
MihaZupan Jul 24, 2023
d4eca96
Combine a-z and !Ascii checks
MihaZupan Jul 24, 2023
f457975
Add more asserts around read pointer manipulation
MihaZupan Jul 25, 2023
ba5a95b
Add a description of the overall Teddy algorithm
MihaZupan Jul 25, 2023
cc7710b
Improve the Teddy comment a bit more
MihaZupan Jul 26, 2023
d4e9557
Comment space alignment
MihaZupan Jul 26, 2023
efdd97a
Add a fallback implementation to deal with incomplete surrogate pairs
MihaZupan Jul 27, 2023
1a3e35d
More edge case tests
MihaZupan Jul 27, 2023
d1c69eb
Add some comments about the time complexity of different implementations
MihaZupan Jul 28, 2023
177cc7c
Fix NLS test
MihaZupan Jul 28, 2023
6c64e45
React to compiler and ref readonly changes
MihaZupan Aug 4, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions THIRD-PARTY-NOTICES.TXT
Original file line number Diff line number Diff line change
Expand Up @@ -1270,3 +1270,30 @@ Licensed under the Apache License, Version 2.0.

Available at
https://github.com/SixLabors/ImageSharp/blob/f4f689ce67ecbcc35cebddba5aacb603e6d1068a/LICENSE

License for the Teddy multi-substring searching implementation
--------------------------------------

https://github.com/BurntSushi/aho-corasick

The MIT License (MIT)

Copyright (c) 2015 Andrew Gallant

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
4 changes: 4 additions & 0 deletions src/libraries/System.Memory/ref/System.Memory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,8 @@ public static partial class MemoryExtensions
public static bool Contains(this System.ReadOnlySpan<char> span, System.ReadOnlySpan<char> value, System.StringComparison comparisonType) { throw null; }
public static bool Contains<T>(this System.ReadOnlySpan<T> span, T value) where T : System.IEquatable<T>? { throw null; }
public static bool Contains<T>(this System.Span<T> span, T value) where T : System.IEquatable<T>? { throw null; }
public static bool ContainsAny(this System.ReadOnlySpan<char> span, System.Buffers.SearchValues<string> values) { throw null; }
public static bool ContainsAny(this System.Span<char> span, System.Buffers.SearchValues<string> values) { throw null; }
public static bool ContainsAny<T>(this System.ReadOnlySpan<T> span, System.Buffers.SearchValues<T> values) where T : System.IEquatable<T>? { throw null; }
public static bool ContainsAny<T>(this System.ReadOnlySpan<T> span, System.ReadOnlySpan<T> values) where T : System.IEquatable<T>? { throw null; }
public static bool ContainsAny<T>(this System.ReadOnlySpan<T> span, T value0, T value1) where T : System.IEquatable<T>? { throw null; }
Expand Down Expand Up @@ -271,6 +273,8 @@ public static void CopyTo<T>(this T[]? source, System.Span<T> destination) { }
public static System.Text.SpanRuneEnumerator EnumerateRunes(this System.Span<char> span) { throw null; }
public static bool Equals(this System.ReadOnlySpan<char> span, System.ReadOnlySpan<char> other, System.StringComparison comparisonType) { throw null; }
public static int IndexOf(this System.ReadOnlySpan<char> span, System.ReadOnlySpan<char> value, System.StringComparison comparisonType) { throw null; }
public static int IndexOfAny(this System.ReadOnlySpan<char> span, System.Buffers.SearchValues<string> values) { throw null; }
public static int IndexOfAny(this System.Span<char> span, System.Buffers.SearchValues<string> values) { throw null; }
public static int IndexOfAny<T>(this System.ReadOnlySpan<T> span, System.Buffers.SearchValues<T> values) where T : System.IEquatable<T>? { throw null; }
public static int IndexOfAny<T>(this System.ReadOnlySpan<T> span, System.ReadOnlySpan<T> values) where T : System.IEquatable<T>? { throw null; }
public static int IndexOfAny<T>(this System.ReadOnlySpan<T> span, T value0, T value1) where T : System.IEquatable<T>? { throw null; }
Expand Down
519 changes: 519 additions & 0 deletions src/libraries/System.Memory/tests/Span/StringSearchValues.cs

Large diffs are not rendered by default.

13 changes: 5 additions & 8 deletions src/libraries/System.Memory/tests/System.Memory.Tests.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,13 @@
<Compile Include="MemoryMarshal\CreateSpan.cs" />
<Compile Include="MemoryMarshal\CreateReadOnlySpan.cs" />
<Compile Include="MemoryMarshal\CreateReadOnlySpanFromNullTerminated.cs" />
<Compile Include="$(CommonPath)..\tests\System\RealFormatterTestsBase.cs"
Link="ParsersAndFormatters\Formatter\RealFormatterTestsBase.cs" />
<Compile Include="$(CommonPath)..\tests\System\RealFormatterTestsBase.cs" Link="ParsersAndFormatters\Formatter\RealFormatterTestsBase.cs" />
<Compile Include="ParsersAndFormatters\Formatter\RealFormatterTests.cs" />
<Compile Include="$(CommonPath)..\tests\System\RealParserTestsBase.cs"
Link="ParsersAndFormatters\Parser\RealParserTestsBase.cs" />
<Compile Include="$(CommonPath)..\tests\System\RealParserTestsBase.cs" Link="ParsersAndFormatters\Parser\RealParserTestsBase.cs" />
<Compile Include="ParsersAndFormatters\Parser\RealParserTests.cs" />
<Compile Include="ReadOnlySpan\Contains.byte.cs" />
<Compile Include="ReadOnlySpan\Contains.T.cs" />
<Compile Include="Span\StringSearchValues.cs" />
<Compile Include="Span\Reflection.cs" />
<Compile Include="SequenceReader\Advance.cs" />
<Compile Include="SequenceReader\BasicTests.cs" />
Expand Down Expand Up @@ -276,9 +275,7 @@
<Compile Include="Base64\Base64ValidationUnitTests.cs" />
</ItemGroup>
<ItemGroup>
<Compile Include="$(CommonTestPath)System\Buffers\NativeMemoryManager.cs"
Link="Common\System\Buffers\NativeMemoryManager.cs" />
<Compile Include="$(CommonPath)System\MutableDecimal.cs"
Link="Common\System\MutableDecimal.cs" />
<Compile Include="$(CommonTestPath)System\Buffers\NativeMemoryManager.cs" Link="Common\System\Buffers\NativeMemoryManager.cs" />
<Compile Include="$(CommonPath)System\MutableDecimal.cs" Link="Common\System\MutableDecimal.cs" />
</ItemGroup>
</Project>
Original file line number Diff line number Diff line change
Expand Up @@ -4244,4 +4244,7 @@
<data name="OutOfMemory_StringTooLong" xml:space="preserve">
<value>String length exceeded supported range.</value>
</data>
<data name="Argument_SearchValues_UnsupportedStringComparison" xml:space="preserve">
<value>SearchValues&lt;string&gt; supports only StringComparison.Ordinal and StringComparison.OrdinalIgnoreCase.</value>
</data>
</root>
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,27 @@
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\SearchValuesDebugView.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\EmptySearchValues.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\ProbabilisticMap.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Strings\Helpers\AhoCorasick.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Strings\Helpers\AhoCorasickBuilder.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Strings\Helpers\AhoCorasickNode.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Strings\Helpers\CharacterFrequencyHelper.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Strings\Helpers\EightPackedReferences.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Strings\Helpers\RabinKarp.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Strings\Helpers\StringSearchValuesHelper.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Strings\Helpers\TeddyBucketizer.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Strings\Helpers\TeddyHelper.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Strings\AsciiStringSearchValuesTeddyBucketizedN2.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Strings\AsciiStringSearchValuesTeddyBucketizedN3.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Strings\AsciiStringSearchValuesTeddyNonBucketizedN2.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Strings\AsciiStringSearchValuesTeddyNonBucketizedN3.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Strings\AsciiStringSearchValuesTeddyBase.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Strings\MultiStringIgnoreCaseSearchValuesFallback.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Strings\SingleStringSearchValuesThreeChars.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Strings\SingleStringSearchValuesFallback.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Strings\StringSearchValues.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Strings\StringSearchValuesBase.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Strings\StringSearchValuesAhoCorasick.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Strings\StringSearchValuesRabinKarp.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\IndexOutOfRangeException.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\InsufficientExecutionStackException.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\InsufficientMemoryException.cs" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ internal static void ToUpper(char h, char l, out char hr, out char lr)
Debug.Assert(char.IsLowSurrogate(l));

UnicodeUtility.GetUtf16SurrogatesFromSupplementaryPlaneScalar(CharUnicodeInfo.ToUpper(UnicodeUtility.GetScalarFromUtf16SurrogatePair(h, l)), out hr, out lr);

Debug.Assert(char.IsHighSurrogate(hr));
Debug.Assert(char.IsLowSurrogate(lr));
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
Expand All @@ -25,6 +28,9 @@ internal static void ToLower(char h, char l, out char hr, out char lr)
Debug.Assert(char.IsLowSurrogate(l));

UnicodeUtility.GetUtf16SurrogatesFromSupplementaryPlaneScalar(CharUnicodeInfo.ToLower(UnicodeUtility.GetScalarFromUtf16SurrogatePair(h, l)), out hr, out lr);

Debug.Assert(char.IsHighSurrogate(hr));
Debug.Assert(char.IsLowSurrogate(lr));
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,24 @@ private unsafe char ChangeCase(char c, bool toUpper)
return dst;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static char ToUpperOrdinal(char c)
{
if (GlobalizationMode.Invariant)
{
return InvariantModeCasing.ToUpper(c);
}

if (GlobalizationMode.UseNls)
{
return char.IsAscii(c)
? ToUpperAsciiInvariant(c)
: Invariant.ChangeCase(c, toUpper: true);
}

return OrdinalCasing.ToUpper(c);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal void ChangeCaseToLower(ReadOnlySpan<char> source, Span<char> destination)
{
Expand Down Expand Up @@ -436,7 +454,7 @@ public string ToUpper(string str)
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static char ToUpperAsciiInvariant(char c)
internal static char ToUpperAsciiInvariant(char c)
{
if (char.IsAsciiLetterLower(c))
{
Expand Down
Loading