-
Notifications
You must be signed in to change notification settings - Fork 22
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* [unicode] The `length()` function learned to correctly handle Unicode surrogate pairs and composite characters. * [unicode] The `sort()` function learned to correctly handle Unicode surrogate pairs and composite characters. * [unicode] The `sort_by()` function learned to correctly handle Unicode surrogate pairs and composite characters. * [unicode] The function `reverse()` learned to correctly handle Unicode surrogate pairs and composite characters. * [unicode] Updated compliance tests. * [unicode] Fixed `length()` function.
- Loading branch information
1 parent
30f6e4e
commit 2b9fbf5
Showing
12 changed files
with
490 additions
and
38 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
using System.Runtime.CompilerServices; | ||
|
||
#if DEBUG | ||
[assembly: InternalsVisibleTo("jmespathnet.tests, PublicKey=0024000004800000940000000602000000240000525341310004000001000100055796df0ae0f975fabb3455d92c9edfef1e266fe66273a7f42c298406335fef71fdf99f46033f5f1e890fa2c6a5f230bfdd5832aa16eb45af02ad70ff716f97a51ff955abaaa2490da59ece7f2474dd43695c6bc8f1c82d1bb38f166fdfa7716e11291bda347bc8689d5435e68401a9ab5b4e8e49c1074173d21edf4fbda1b1")] | ||
#else | ||
[assembly: InternalsVisibleTo("jmespathnet.tests, PublicKey=002400000480000094000000060200000024000052534131000400000100010055709b8bb177721db5eb5a9e7437bfa5f46251aef5dcf91f4a36a7dcb98e51a8ecf5a37284004fa6694f3471f5dfc82244c9672eb085cd65c7cb75d8251aa971a349d4641b492ca0963b74fd9878a5872d6ccbb7b7ceff82aa3687c240a70b4d5565c7cff5df0a12cdbde58e937320fb302b7ccedff72008f3bec0bee8384dc5")] | ||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
using System; | ||
using System.Collections; | ||
using System.Collections.Generic; | ||
using System.Globalization; | ||
|
||
namespace DevLab.JmesPath.Utils | ||
{ | ||
/// <summary> | ||
/// An <see cref="IEnumerator{string}" /> implementation | ||
/// that enumerates Unicode codepoints in a string. | ||
/// </summary> | ||
internal sealed class CodePointEnumerator : IEnumerator<int> | ||
{ | ||
private readonly string text_; | ||
|
||
private int[] codePoints_; | ||
private int index_ = -1; | ||
|
||
/// <summary> | ||
/// Initialize a new instance of the <see cref="CodePointEnumerator" /> class. | ||
/// </summary> | ||
/// <param name="text"></param> | ||
public CodePointEnumerator(Text text) | ||
{ | ||
text_ = text; | ||
codePoints_ = GetCodePoints(text); | ||
} | ||
|
||
public IEnumerable<int> AsEnumerable() | ||
{ | ||
while (MoveNext()) | ||
yield return Current; | ||
} | ||
|
||
public int Current | ||
=> codePoints_[index_]; | ||
|
||
object IEnumerator.Current | ||
=> Current; | ||
|
||
public bool MoveNext() | ||
=> ++index_ < codePoints_.Length; | ||
|
||
public void Reset() | ||
{ | ||
index_ = -1; | ||
} | ||
|
||
public void Dispose() { } | ||
|
||
internal static int[] GetCodePoints(Text text) | ||
{ | ||
var codePoints = new List<int>(); | ||
|
||
var enumerator = StringInfo.GetTextElementEnumerator(text); | ||
while (enumerator.MoveNext()) | ||
{ | ||
var element = enumerator.GetTextElement(); | ||
|
||
// element represents either a codepoint from the basic multilingual plane | ||
// or a supplementary plane encoded as a pair of surrogate UTF-16 code units. | ||
|
||
if (element.Length > 1 && (Char.IsSurrogatePair(element[0], element[1]))) | ||
{ | ||
System.Diagnostics.Debug.Assert(element.Length == 2); | ||
codePoints.Add(Char.ConvertToUtf32(element[0], element[1])); | ||
} | ||
|
||
else | ||
{ | ||
foreach (var codePoint in element) | ||
codePoints.Add(codePoint); | ||
} | ||
} | ||
|
||
return codePoints.ToArray(); | ||
} | ||
} | ||
} |
Oops, something went wrong.