Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix/unicode surrogate pairs #75

Merged
merged 6 commits into from
Oct 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/jmespath.net/Functions/LengthFunction.cs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ public override JToken Execute(params JmesPathFunctionArgument[] args)
switch (token.GetTokenType())
{
case "string":
return token.Value<String>().Length;
return ((Text)token.Value<String>()).Length;
case "array":
return ((JArray) token).Count;
case "object":
Expand Down
15 changes: 8 additions & 7 deletions src/jmespath.net/Functions/ReverseFunction.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,16 @@ public override JToken Execute(params JmesPathFunctionArgument[] args)
switch (token.GetTokenType())
{
case "string":
{
var characters = token.Value<String>().Reverse().ToArray();
return new JValue(new string(characters));
}
{
var text = (Text)token.Value<String>();
var reversed = new Text(text.CodePoints.Reverse().ToArray());
return new JValue((string)reversed);
}
case "array":
{
{
var items = ((JArray)token).Reverse();
return new JArray().AddRange(items);
}
return new JArray().AddRange(items);
}
default:
return null;
}
Expand Down
84 changes: 64 additions & 20 deletions src/jmespath.net/Functions/SortByFunction.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
using System;
using System.Collections.Generic;
using System.Linq;
using DevLab.JmesPath.Expressions;
using DevLab.JmesPath.Utils;
using Newtonsoft.Json.Linq;

Expand All @@ -21,35 +23,77 @@ public override JToken Execute(params JmesPathFunctionArgument[] args)
var array = (JArray)args[0].Token;
var expression = args[1].Expression;

var done = false;
if (array.Count == 0)
return new JArray();

var expectedItemType = "none";
// make sure this is an homogeneous array
// with all items from a single expected type

var ordered = array.OrderBy(u =>
{
var e = expression.Transform(u);
var keyCollection = array
.Select(u => expression.Transform(u).AsJToken())
.ToArray()
;

var actualItemType = keyCollection[0].GetTokenType();
if (actualItemType != "number" && actualItemType != "string")
throw new Exception($"Error: invalid-type, the expression argument of function {Name} should return a number or a string.");

if (keyCollection.Any(k => k.GetTokenType() != actualItemType))
throw new Exception($"Error: invalid-type, all items resulting from the evaluation of the expression argument of function {Name} should have the same type.");

var actualItemType = e.AsJToken().GetTokenType();
// sort array

if (!done)
{
if (actualItemType != "number" && actualItemType != "string")
throw new Exception($"Error: invalid-type, the expression argument of function {Name} should return a number or a string.");
var tokens = array.AsEnumerable().ToArray();
JToken[] ordered = tokens;

expectedItemType = actualItemType;
done = true;
}
if (actualItemType == "number")
{
var actualKeyTokenType = keyCollection[0].Type;
if (actualKeyTokenType == JTokenType.Float)
ordered = SortByNumbers<double>(tokens, expression);
else if (actualKeyTokenType == JTokenType.Integer)
ordered = SortByNumbers<int>(tokens, expression);
}
else
{
ordered = SortByText(tokens, expression);
}

if (expectedItemType != actualItemType)
throw new Exception("Error: invalid-type, all items resulting from the evaluation of the expression argument of function {Name} should have the same type.");

return e.AsJToken();
return new JArray(ordered);
}

}).ToArray();
private JToken[] SortByNumbers<T>(JToken[] array, JmesPathExpression expression)
{
T keySelector(JToken t) {
var token = expression.Transform(t).AsJToken();
return token.Value<T>();
};

return new JArray()
.AddRange(ordered)
var ordered = array
.OrderBy(keySelector)
.ToArray()
;

return ordered;
}
private JToken[] SortByText(JToken[] array, JmesPathExpression expression)
{
Text keySelector(JToken t)
{
var key = expression.Transform(t).AsJToken();
return (Text) key.Value<string>();
};
IComparer<Text> comparer = Text.CodePointComparer;

var ordered = array
.OrderBy(
keySelector,
comparer
)
.ToArray()
;

return ordered;
}
}
}
27 changes: 18 additions & 9 deletions src/jmespath.net/Functions/SortFunction.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using System;
using System.Collections.Generic;
using System.Linq;
using DevLab.JmesPath.Utils;
using Newtonsoft.Json.Linq;
Expand Down Expand Up @@ -28,18 +29,26 @@ public override JToken Execute(params JmesPathFunctionArgument[] args)
var item = array[0];

if (item.Type == JTokenType.Float)
return new JArray().AddRange(Sort<double>(array));
return JArray.FromObject(SortNumber<double>(array));
else if (item.Type == JTokenType.Integer)
return new JArray().AddRange(Sort<int>(array));
return JArray.FromObject(SortNumber<int>(array));
else
return new JArray().AddRange(Sort<string>(array));
return JArray.FromObject(SortText(array));
}

private static JToken[] Sort<T>(JArray array)
{
return array
.OrderBy(u => u.Value<T>())
.ToArray();
}
internal static T[] SortNumber<T>(JArray array)
=> array
.Values<T>()
.OrderBy(u => u)
.ToArray()
;

internal static string[] SortText(JArray array)
=> array
.Select(u => (Text)u.Value<string>())
.OrderBy(u => u, Text.CodePointComparer)
.Select(u => (string)u)
.ToArray()
;
}
}
7 changes: 7 additions & 0 deletions src/jmespath.net/InternalsVisibleTo.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
using System.Runtime.CompilerServices;

#if DEBUG
[assembly: InternalsVisibleTo("jmespathnet.tests, PublicKey=0024000004800000940000000602000000240000525341310004000001000100055796df0ae0f975fabb3455d92c9edfef1e266fe66273a7f42c298406335fef71fdf99f46033f5f1e890fa2c6a5f230bfdd5832aa16eb45af02ad70ff716f97a51ff955abaaa2490da59ece7f2474dd43695c6bc8f1c82d1bb38f166fdfa7716e11291bda347bc8689d5435e68401a9ab5b4e8e49c1074173d21edf4fbda1b1")]
#else
[assembly: InternalsVisibleTo("jmespathnet.tests, PublicKey=002400000480000094000000060200000024000052534131000400000100010055709b8bb177721db5eb5a9e7437bfa5f46251aef5dcf91f4a36a7dcb98e51a8ecf5a37284004fa6694f3471f5dfc82244c9672eb085cd65c7cb75d8251aa971a349d4641b492ca0963b74fd9878a5872d6ccbb7b7ceff82aa3687c240a70b4d5565c7cff5df0a12cdbde58e937320fb302b7ccedff72008f3bec0bee8384dc5")]
#endif
79 changes: 79 additions & 0 deletions src/jmespath.net/Utils/CodePointEnumerator.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
using System;
using System.Collections;
using System.Collections.Generic;
using System.Globalization;

namespace DevLab.JmesPath.Utils
{
/// <summary>
/// An <see cref="IEnumerator{string}" /> implementation
/// that enumerates Unicode codepoints in a string.
/// </summary>
internal sealed class CodePointEnumerator : IEnumerator<int>
{
private readonly string text_;

private int[] codePoints_;
private int index_ = -1;

/// <summary>
/// Initialize a new instance of the <see cref="CodePointEnumerator" /> class.
/// </summary>
/// <param name="text"></param>
public CodePointEnumerator(Text text)
{
text_ = text;
codePoints_ = GetCodePoints(text);
}

public IEnumerable<int> AsEnumerable()
{
while (MoveNext())
yield return Current;
}

public int Current
=> codePoints_[index_];

object IEnumerator.Current
=> Current;

public bool MoveNext()
=> ++index_ < codePoints_.Length;

public void Reset()
{
index_ = -1;
}

public void Dispose() { }

internal static int[] GetCodePoints(Text text)
{
var codePoints = new List<int>();

var enumerator = StringInfo.GetTextElementEnumerator(text);
while (enumerator.MoveNext())
{
var element = enumerator.GetTextElement();

// element represents either a codepoint from the basic multilingual plane
// or a supplementary plane encoded as a pair of surrogate UTF-16 code units.

if (element.Length > 1 && (Char.IsSurrogatePair(element[0], element[1])))
{
System.Diagnostics.Debug.Assert(element.Length == 2);
codePoints.Add(Char.ConvertToUtf32(element[0], element[1]));
}

else
{
foreach (var codePoint in element)
codePoints.Add(codePoint);
}
}

return codePoints.ToArray();
}
}
}
Loading