Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rework SnakeCase/KebabCase naming policies to closer match Json.NET's #90316

Merged
merged 12 commits into from
Aug 11, 2023
233 changes: 121 additions & 112 deletions src/libraries/System.Text.Json/Common/JsonSeparatorNamingPolicy.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
// The .NET Foundation licenses this file to you under the MIT license.

using System.Buffers;
using System.Diagnostics;
using System.Globalization;
using System.Runtime.CompilerServices;

namespace System.Text.Json
{
Expand All @@ -11,8 +13,13 @@ internal abstract class JsonSeparatorNamingPolicy : JsonNamingPolicy
private readonly bool _lowercase;
private readonly char _separator;

internal JsonSeparatorNamingPolicy(bool lowercase, char separator) =>
(_lowercase, _separator) = (lowercase, separator);
internal JsonSeparatorNamingPolicy(bool lowercase, char separator)
{
Debug.Assert(char.IsPunctuation(separator));

_lowercase = lowercase;
_separator = separator;
}

public sealed override string ConvertName(string name)
{
Expand All @@ -21,149 +28,151 @@ public sealed override string ConvertName(string name)
ThrowHelper.ThrowArgumentNullException(nameof(name));
}

// Rented buffer 20% longer that the input.
int rentedBufferLength = (12 * name.Length) / 10;
char[]? rentedBuffer = rentedBufferLength > JsonConstants.StackallocCharThreshold
? ArrayPool<char>.Shared.Rent(rentedBufferLength)
: null;
return ConvertNameCore(_separator, _lowercase, name.AsSpan());
}

int resultUsedLength = 0;
Span<char> result = rentedBuffer is null
private static string ConvertNameCore(char separator, bool lowercase, ReadOnlySpan<char> chars)
{
char[]? rentedBuffer = null;

// While we can't predict the expansion factor of the resultant string,
// start with a buffer that is at least 20% larger than the input.
int initialBufferLength = (int)(1.2 * chars.Length);
Span<char> destination = initialBufferLength <= JsonConstants.StackallocCharThreshold
? stackalloc char[JsonConstants.StackallocCharThreshold]
: rentedBuffer;
: (rentedBuffer = ArrayPool<char>.Shared.Rent(initialBufferLength));

void ExpandBuffer(ref Span<char> result)
{
char[] newBuffer = ArrayPool<char>.Shared.Rent(result.Length * 2);
SeparatorState state = SeparatorState.NotStarted;
int charsWritten = 0;

result.CopyTo(newBuffer);
for (int i = 0; i < chars.Length; i++)
{
char current = chars[i];

if (rentedBuffer is not null)
switch (char.GetUnicodeCategory(current))
{
result.Slice(0, resultUsedLength).Clear();
ArrayPool<char>.Shared.Return(rentedBuffer);
}
case UnicodeCategory.UppercaseLetter:

rentedBuffer = newBuffer;
result = rentedBuffer;
}
switch (state)
{
case SeparatorState.NotStarted:
break;

case SeparatorState.LowercaseLetterOrDigit:
case SeparatorState.SpaceSeparator:
// An uppercase letter following a sequence of lowercase letters or spaces
// denotes the start of a new grouping: emit a separator character.
WriteChar(separator, ref destination);
break;

case SeparatorState.UppercaseLetter:
// We are reading through a sequence of two or more uppercase letters.
// Uppercase letters are grouped together with the exception of the
// final letter, assuming it is followed by lowercase letters.
// For example, the value 'XMLReader' should render as 'xml_reader',
// however 'SHA512Hash' should render as 'sha512-hash'.
if (i + 1 < chars.Length && char.IsLower(chars[i + 1]))
{
WriteChar(separator, ref destination);
}
break;

default:
Debug.Fail($"Unexpected state {state}");
break;
}

void WriteWord(ReadOnlySpan<char> word, ref Span<char> result)
{
if (word.IsEmpty)
{
return;
}
if (lowercase)
{
current = char.ToLowerInvariant(current);
}

int written;
while (true)
{
var destinationOffset = resultUsedLength != 0
? resultUsedLength + 1
: resultUsedLength;
WriteChar(current, ref destination);
state = SeparatorState.UppercaseLetter;
break;

if (destinationOffset < result.Length)
{
Span<char> destination = result.Slice(destinationOffset);
case UnicodeCategory.LowercaseLetter:
case UnicodeCategory.DecimalDigitNumber:

written = _lowercase
? word.ToLowerInvariant(destination)
: word.ToUpperInvariant(destination);
if (state is SeparatorState.SpaceSeparator)
{
// Normalize preceding spaces to one separator.
WriteChar(separator, ref destination);
}

if (written > 0)
if (!lowercase)
{
break;
current = char.ToUpperInvariant(current);
eiriktsarpalis marked this conversation as resolved.
Show resolved Hide resolved
}
}

ExpandBuffer(ref result);
}
WriteChar(current, ref destination);
state = SeparatorState.LowercaseLetterOrDigit;
break;

if (resultUsedLength != 0)
{
result[resultUsedLength] = _separator;
resultUsedLength += 1;
}
case UnicodeCategory.SpaceSeparator:
eiriktsarpalis marked this conversation as resolved.
Show resolved Hide resolved
// Space characters are trimmed from the start and end of the input string
// but are normalized to separator characters if between letters.
if (state != SeparatorState.NotStarted)
{
state = SeparatorState.SpaceSeparator;
}
break;

resultUsedLength += written;
default:
// Non-alphanumeric characters (including the separator character itself)
// are written as-is to the output and reset the separator state.
// E.g. 'ABC???def' maps to 'abc???def' in snake_case.

WriteChar(current, ref destination);
state = SeparatorState.NotStarted;
break;
}
}

int first = 0;
ReadOnlySpan<char> chars = name.AsSpan();
CharCategory previousCategory = CharCategory.Boundary;
string result = destination.Slice(0, charsWritten).ToString();

for (int index = 0; index < chars.Length; index++)
if (rentedBuffer is not null)
{
char current = chars[index];
UnicodeCategory currentCategoryUnicode = char.GetUnicodeCategory(current);

if (currentCategoryUnicode == UnicodeCategory.SpaceSeparator ||
currentCategoryUnicode >= UnicodeCategory.ConnectorPunctuation &&
currentCategoryUnicode <= UnicodeCategory.OtherPunctuation)
{
WriteWord(chars.Slice(first, index - first), ref result);

previousCategory = CharCategory.Boundary;
first = index + 1;
destination.Slice(0, charsWritten).Clear();
ArrayPool<char>.Shared.Return(rentedBuffer);
}

continue;
}
return result;

if (index + 1 < chars.Length)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
void WriteChar(char value, ref Span<char> destination)
{
if (charsWritten == destination.Length)
{
char next = chars[index + 1];
CharCategory currentCategory = currentCategoryUnicode switch
{
UnicodeCategory.LowercaseLetter => CharCategory.Lowercase,
UnicodeCategory.UppercaseLetter => CharCategory.Uppercase,
_ => previousCategory
};

if (currentCategory == CharCategory.Lowercase && char.IsUpper(next) ||
next == '_')
{
WriteWord(chars.Slice(first, index - first + 1), ref result);

previousCategory = CharCategory.Boundary;
first = index + 1;

continue;
}

if (previousCategory == CharCategory.Uppercase &&
currentCategoryUnicode == UnicodeCategory.UppercaseLetter &&
char.IsLower(next))
{
WriteWord(chars.Slice(first, index - first), ref result);

previousCategory = CharCategory.Boundary;
first = index;

continue;
}

previousCategory = currentCategory;
ExpandBuffer(ref destination);
}

destination[charsWritten++] = value;
}

WriteWord(chars.Slice(first), ref result);
void ExpandBuffer(ref Span<char> destination)
{
int newSize = checked(destination.Length * 2);
eiriktsarpalis marked this conversation as resolved.
Show resolved Hide resolved
char[] newBuffer = ArrayPool<char>.Shared.Rent(newSize);
destination.CopyTo(newBuffer);

name = result.Slice(0, resultUsedLength).ToString();
if (rentedBuffer is not null)
{
destination.Slice(0, charsWritten).Clear();
ArrayPool<char>.Shared.Return(rentedBuffer);
}

if (rentedBuffer is not null)
{
result.Slice(0, resultUsedLength).Clear();
ArrayPool<char>.Shared.Return(rentedBuffer);
rentedBuffer = newBuffer;
destination = rentedBuffer;
}

return name;
}

private enum CharCategory
private enum SeparatorState
{
Boundary,
Lowercase,
Uppercase,
NotStarted,
UppercaseLetter,
LowercaseLetterOrDigit,
SpaceSeparator,
}
}
}
Loading