Skip to content

Commit

Permalink
Rework SnakeCase/KebabCase naming policies to closer match Json.NET's (
Browse files Browse the repository at this point in the history
…dotnet#90316)

* Rework JsonNamingPolicy.SnakeCase/KebabCase to match Json.NET semantics.

* Alternative implementation that handles digits properly but doesn't trim non-alphanumeric characters.

* Refactor to switch statement

* add non-ascii letter test case

* Address feedback

* Add support for surrogate pair capitalization

* Revert "Add support for surrogate pair capitalization"

This reverts commit c1c5d1c.

* Add surrogate pair unit tests

* Address more feedback

* Address feedback and add a few more test cases.

* Add more surrogate pair tests

* Add unpaired surrogate testing
  • Loading branch information
eiriktsarpalis authored Aug 11, 2023
1 parent 3aec142 commit c6db89c
Show file tree
Hide file tree
Showing 2 changed files with 468 additions and 321 deletions.
235 changes: 124 additions & 111 deletions src/libraries/System.Text.Json/Common/JsonSeparatorNamingPolicy.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
// The .NET Foundation licenses this file to you under the MIT license.

using System.Buffers;
using System.Diagnostics;
using System.Globalization;
using System.Runtime.CompilerServices;

namespace System.Text.Json
{
Expand All @@ -11,8 +13,13 @@ internal abstract class JsonSeparatorNamingPolicy : JsonNamingPolicy
private readonly bool _lowercase;
private readonly char _separator;

internal JsonSeparatorNamingPolicy(bool lowercase, char separator) =>
(_lowercase, _separator) = (lowercase, separator);
internal JsonSeparatorNamingPolicy(bool lowercase, char separator)
{
Debug.Assert(char.IsPunctuation(separator));

_lowercase = lowercase;
_separator = separator;
}

public sealed override string ConvertName(string name)
{
Expand All @@ -21,149 +28,155 @@ public sealed override string ConvertName(string name)
ThrowHelper.ThrowArgumentNullException(nameof(name));
}

// Rented buffer 20% longer that the input.
int rentedBufferLength = (12 * name.Length) / 10;
char[]? rentedBuffer = rentedBufferLength > JsonConstants.StackallocCharThreshold
? ArrayPool<char>.Shared.Rent(rentedBufferLength)
: null;
return ConvertNameCore(_separator, _lowercase, name.AsSpan());
}

private static string ConvertNameCore(char separator, bool lowercase, ReadOnlySpan<char> chars)
{
char[]? rentedBuffer = null;

int resultUsedLength = 0;
Span<char> result = rentedBuffer is null
// While we can't predict the expansion factor of the resultant string,
// start with a buffer that is at least 20% larger than the input.
int initialBufferLength = (int)(1.2 * chars.Length);
Span<char> destination = initialBufferLength <= JsonConstants.StackallocCharThreshold
? stackalloc char[JsonConstants.StackallocCharThreshold]
: rentedBuffer;
: (rentedBuffer = ArrayPool<char>.Shared.Rent(initialBufferLength));

void ExpandBuffer(ref Span<char> result)
SeparatorState state = SeparatorState.NotStarted;
int charsWritten = 0;

for (int i = 0; i < chars.Length; i++)
{
char[] newBuffer = ArrayPool<char>.Shared.Rent(result.Length * 2);
// NB this implementation does not handle surrogate pair letters
// cf. https://github.com/dotnet/runtime/issues/90352

result.CopyTo(newBuffer);
char current = chars[i];
UnicodeCategory category = char.GetUnicodeCategory(current);

if (rentedBuffer is not null)
switch (category)
{
result.Slice(0, resultUsedLength).Clear();
ArrayPool<char>.Shared.Return(rentedBuffer);
}
case UnicodeCategory.UppercaseLetter:

rentedBuffer = newBuffer;
result = rentedBuffer;
}
switch (state)
{
case SeparatorState.NotStarted:
break;

case SeparatorState.LowercaseLetterOrDigit:
case SeparatorState.SpaceSeparator:
// An uppercase letter following a sequence of lowercase letters or spaces
// denotes the start of a new grouping: emit a separator character.
WriteChar(separator, ref destination);
break;

case SeparatorState.UppercaseLetter:
// We are reading through a sequence of two or more uppercase letters.
// Uppercase letters are grouped together with the exception of the
// final letter, assuming it is followed by lowercase letters.
// For example, the value 'XMLReader' should render as 'xml_reader',
// however 'SHA512Hash' should render as 'sha512-hash'.
if (i + 1 < chars.Length && char.IsLower(chars[i + 1]))
{
WriteChar(separator, ref destination);
}
break;

default:
Debug.Fail($"Unexpected state {state}");
break;
}

void WriteWord(ReadOnlySpan<char> word, ref Span<char> result)
{
if (word.IsEmpty)
{
return;
}
if (lowercase)
{
current = char.ToLowerInvariant(current);
}

int written;
while (true)
{
var destinationOffset = resultUsedLength != 0
? resultUsedLength + 1
: resultUsedLength;
WriteChar(current, ref destination);
state = SeparatorState.UppercaseLetter;
break;

if (destinationOffset < result.Length)
{
Span<char> destination = result.Slice(destinationOffset);
case UnicodeCategory.LowercaseLetter:
case UnicodeCategory.DecimalDigitNumber:

written = _lowercase
? word.ToLowerInvariant(destination)
: word.ToUpperInvariant(destination);
if (state is SeparatorState.SpaceSeparator)
{
// Normalize preceding spaces to one separator.
WriteChar(separator, ref destination);
}

if (written > 0)
if (!lowercase && category is UnicodeCategory.LowercaseLetter)
{
break;
current = char.ToUpperInvariant(current);
}
}

ExpandBuffer(ref result);
}
WriteChar(current, ref destination);
state = SeparatorState.LowercaseLetterOrDigit;
break;

if (resultUsedLength != 0)
{
result[resultUsedLength] = _separator;
resultUsedLength += 1;
}
case UnicodeCategory.SpaceSeparator:
// Space characters are trimmed from the start and end of the input string
// but are normalized to separator characters if between letters.
if (state != SeparatorState.NotStarted)
{
state = SeparatorState.SpaceSeparator;
}
break;

resultUsedLength += written;
default:
// Non-alphanumeric characters (including the separator character and surrogates)
// are written as-is to the output and reset the separator state.
// E.g. 'ABC???def' maps to 'abc???def' in snake_case.

WriteChar(current, ref destination);
state = SeparatorState.NotStarted;
break;
}
}

int first = 0;
ReadOnlySpan<char> chars = name.AsSpan();
CharCategory previousCategory = CharCategory.Boundary;
string result = destination.Slice(0, charsWritten).ToString();

for (int index = 0; index < chars.Length; index++)
if (rentedBuffer is not null)
{
char current = chars[index];
UnicodeCategory currentCategoryUnicode = char.GetUnicodeCategory(current);

if (currentCategoryUnicode == UnicodeCategory.SpaceSeparator ||
currentCategoryUnicode >= UnicodeCategory.ConnectorPunctuation &&
currentCategoryUnicode <= UnicodeCategory.OtherPunctuation)
{
WriteWord(chars.Slice(first, index - first), ref result);

previousCategory = CharCategory.Boundary;
first = index + 1;
destination.Slice(0, charsWritten).Clear();
ArrayPool<char>.Shared.Return(rentedBuffer);
}

continue;
}
return result;

if (index + 1 < chars.Length)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
void WriteChar(char value, ref Span<char> destination)
{
if (charsWritten == destination.Length)
{
char next = chars[index + 1];
CharCategory currentCategory = currentCategoryUnicode switch
{
UnicodeCategory.LowercaseLetter => CharCategory.Lowercase,
UnicodeCategory.UppercaseLetter => CharCategory.Uppercase,
_ => previousCategory
};

if (currentCategory == CharCategory.Lowercase && char.IsUpper(next) ||
next == '_')
{
WriteWord(chars.Slice(first, index - first + 1), ref result);

previousCategory = CharCategory.Boundary;
first = index + 1;

continue;
}

if (previousCategory == CharCategory.Uppercase &&
currentCategoryUnicode == UnicodeCategory.UppercaseLetter &&
char.IsLower(next))
{
WriteWord(chars.Slice(first, index - first), ref result);

previousCategory = CharCategory.Boundary;
first = index;

continue;
}

previousCategory = currentCategory;
ExpandBuffer(ref destination);
}

destination[charsWritten++] = value;
}

WriteWord(chars.Slice(first), ref result);
void ExpandBuffer(ref Span<char> destination)
{
int newSize = checked(destination.Length * 2);
char[] newBuffer = ArrayPool<char>.Shared.Rent(newSize);
destination.CopyTo(newBuffer);

name = result.Slice(0, resultUsedLength).ToString();
if (rentedBuffer is not null)
{
destination.Slice(0, charsWritten).Clear();
ArrayPool<char>.Shared.Return(rentedBuffer);
}

if (rentedBuffer is not null)
{
result.Slice(0, resultUsedLength).Clear();
ArrayPool<char>.Shared.Return(rentedBuffer);
rentedBuffer = newBuffer;
destination = rentedBuffer;
}

return name;
}

private enum CharCategory
private enum SeparatorState
{
Boundary,
Lowercase,
Uppercase,
NotStarted,
UppercaseLetter,
LowercaseLetterOrDigit,
SpaceSeparator,
}
}
}
Loading

0 comments on commit c6db89c

Please sign in to comment.