Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rework SnakeCase/KebabCase naming policies to closer match Json.NET's #90316

Merged
merged 12 commits into from
Aug 11, 2023
236 changes: 124 additions & 112 deletions src/libraries/System.Text.Json/Common/JsonSeparatorNamingPolicy.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
// The .NET Foundation licenses this file to you under the MIT license.

using System.Buffers;
using System.Diagnostics;
using System.Globalization;
using System.Runtime.CompilerServices;

namespace System.Text.Json
{
Expand All @@ -11,8 +13,13 @@ internal abstract class JsonSeparatorNamingPolicy : JsonNamingPolicy
private readonly bool _lowercase;
private readonly char _separator;

internal JsonSeparatorNamingPolicy(bool lowercase, char separator) =>
(_lowercase, _separator) = (lowercase, separator);
internal JsonSeparatorNamingPolicy(bool lowercase, char separator)
{
Debug.Assert(char.IsPunctuation(separator));

_lowercase = lowercase;
_separator = separator;
}

public sealed override string ConvertName(string name)
{
Expand All @@ -21,149 +28,154 @@ public sealed override string ConvertName(string name)
ThrowHelper.ThrowArgumentNullException(nameof(name));
}

// Rented buffer 20% longer that the input.
int rentedBufferLength = (12 * name.Length) / 10;
char[]? rentedBuffer = rentedBufferLength > JsonConstants.StackallocCharThreshold
? ArrayPool<char>.Shared.Rent(rentedBufferLength)
: null;

int resultUsedLength = 0;
Span<char> result = rentedBuffer is null
? stackalloc char[JsonConstants.StackallocCharThreshold]
: rentedBuffer;
return ConvertNameCore(_separator, _lowercase, name);
}

void ExpandBuffer(ref Span<char> result)
{
char[] newBuffer = ArrayPool<char>.Shared.Rent(result.Length * 2);
private static string ConvertNameCore(char separator, bool lowercase, string name)
{
Debug.Assert(name != null);

result.CopyTo(newBuffer);
char[]? rentedBuffer = null;

if (rentedBuffer is not null)
{
result.Slice(0, resultUsedLength).Clear();
ArrayPool<char>.Shared.Return(rentedBuffer);
}
// While we can't predict the expansion factor of the resultant string,
// start with a buffer that is at least 20% larger than the input.
int initialBufferLength = (int)(1.2 * name.Length);
Span<char> destination = initialBufferLength <= JsonConstants.StackallocCharThreshold
? stackalloc char[JsonConstants.StackallocCharThreshold]
: (rentedBuffer = ArrayPool<char>.Shared.Rent(initialBufferLength));

rentedBuffer = newBuffer;
result = rentedBuffer;
}
ReadOnlySpan<char> chars = name.AsSpan();
SeparatorState state = SeparatorState.NotStarted;
int charsWritten = 0;

void WriteWord(ReadOnlySpan<char> word, ref Span<char> result)
for (int i = 0; i < chars.Length; i++)
{
if (word.IsEmpty)
{
return;
}
char current = chars[i];

int written;
while (true)
switch (char.GetUnicodeCategory(current))
{
var destinationOffset = resultUsedLength != 0
? resultUsedLength + 1
: resultUsedLength;
case UnicodeCategory.UppercaseLetter:

if (destinationOffset < result.Length)
{
Span<char> destination = result.Slice(destinationOffset);

written = _lowercase
? word.ToLowerInvariant(destination)
: word.ToUpperInvariant(destination);

if (written > 0)
switch (state)
{
break;
case SeparatorState.NotStarted:
break;

case SeparatorState.LowercaseLetterOrDigit:
case SeparatorState.SpaceSeparator:
// An uppercase letter following a sequence of lowercase letters or spaces
// denotes the start of a new grouping: emit a separator character.
WriteChar(separator, ref destination);
break;

case SeparatorState.UppercaseLetter:
// We are reading through a sequence of two or more uppercase letters.
// Uppercase letters are grouped together with the exception of the
// final letter, assuming it is followed by lowercase letters.
// For example, the value 'XMLReader' should render as 'xml_reader',
// however 'SHA512Hash' should render as 'sha512-hash'.
if (i + 1 < chars.Length && char.IsLower(chars[i + 1]))
{
WriteChar(separator, ref destination);
}
break;

default:
Debug.Fail($"Unexpected state {state}");
break;
}
}

ExpandBuffer(ref result);
}
if (lowercase)
{
current = char.ToLowerInvariant(current);
}

if (resultUsedLength != 0)
{
result[resultUsedLength] = _separator;
resultUsedLength += 1;
}
WriteChar(current, ref destination);
state = SeparatorState.UppercaseLetter;
break;

resultUsedLength += written;
}
case UnicodeCategory.LowercaseLetter:
case UnicodeCategory.DecimalDigitNumber:

int first = 0;
ReadOnlySpan<char> chars = name.AsSpan();
CharCategory previousCategory = CharCategory.Boundary;
if (state is SeparatorState.SpaceSeparator)
{
// Normalize preceding spaces to one separator.
WriteChar(separator, ref destination);
}

for (int index = 0; index < chars.Length; index++)
{
char current = chars[index];
UnicodeCategory currentCategoryUnicode = char.GetUnicodeCategory(current);
if (!lowercase)
{
current = char.ToUpperInvariant(current);
eiriktsarpalis marked this conversation as resolved.
Show resolved Hide resolved
}

if (currentCategoryUnicode == UnicodeCategory.SpaceSeparator ||
currentCategoryUnicode >= UnicodeCategory.ConnectorPunctuation &&
currentCategoryUnicode <= UnicodeCategory.OtherPunctuation)
{
WriteWord(chars.Slice(first, index - first), ref result);
WriteChar(current, ref destination);
state = SeparatorState.LowercaseLetterOrDigit;
break;

previousCategory = CharCategory.Boundary;
first = index + 1;
case UnicodeCategory.SpaceSeparator:
eiriktsarpalis marked this conversation as resolved.
Show resolved Hide resolved
// Space characters are trimmed from the start and end of the input string
// but are normalized to separator characters if between letters.
if (state != SeparatorState.NotStarted)
{
state = SeparatorState.SpaceSeparator;
}
break;

continue;
}
default:
// Non-alphanumeric characters (including the separator character itself)
// are written as-is to the output and reset the separator state.
// E.g. 'ABC???def' maps to 'abc???def' in snake_case.

if (index + 1 < chars.Length)
{
char next = chars[index + 1];
CharCategory currentCategory = currentCategoryUnicode switch
{
UnicodeCategory.LowercaseLetter => CharCategory.Lowercase,
UnicodeCategory.UppercaseLetter => CharCategory.Uppercase,
_ => previousCategory
};

if (currentCategory == CharCategory.Lowercase && char.IsUpper(next) ||
next == '_')
{
WriteWord(chars.Slice(first, index - first + 1), ref result);

previousCategory = CharCategory.Boundary;
first = index + 1;

continue;
}

if (previousCategory == CharCategory.Uppercase &&
currentCategoryUnicode == UnicodeCategory.UppercaseLetter &&
char.IsLower(next))
{
WriteWord(chars.Slice(first, index - first), ref result);

previousCategory = CharCategory.Boundary;
first = index;

continue;
}

previousCategory = currentCategory;
WriteChar(current, ref destination);
state = SeparatorState.NotStarted;
break;
}
}

WriteWord(chars.Slice(first), ref result);

name = result.Slice(0, resultUsedLength).ToString();
name = destination.Slice(0, charsWritten).ToString();

if (rentedBuffer is not null)
{
result.Slice(0, resultUsedLength).Clear();
destination.Slice(0, charsWritten).Clear();
ArrayPool<char>.Shared.Return(rentedBuffer);
}

return name;

[MethodImpl(MethodImplOptions.AggressiveInlining)]
void WriteChar(char value, ref Span<char> destination)
{
if (charsWritten == destination.Length)
{
ExpandBuffer(ref destination);
}

destination[charsWritten++] = value;
}

void ExpandBuffer(ref Span<char> destination)
{
int newSize = checked(destination.Length * 2);
eiriktsarpalis marked this conversation as resolved.
Show resolved Hide resolved
char[] newBuffer = ArrayPool<char>.Shared.Rent(newSize);
destination.CopyTo(newBuffer);

if (rentedBuffer is not null)
{
destination.Slice(0, charsWritten).Clear();
ArrayPool<char>.Shared.Return(rentedBuffer);
}

rentedBuffer = newBuffer;
destination = rentedBuffer;
}
}

private enum CharCategory
private enum SeparatorState
{
Boundary,
Lowercase,
Uppercase,
NotStarted,
UppercaseLetter,
LowercaseLetterOrDigit,
SpaceSeparator,
}
}
}
Loading