diff --git a/src/libraries/System.Globalization/tests/CultureInfo/CultureInfoCtor.cs b/src/libraries/System.Globalization/tests/CultureInfo/CultureInfoCtor.cs index f53ed73cc7f067..329bf076d500c2 100644 --- a/src/libraries/System.Globalization/tests/CultureInfo/CultureInfoCtor.cs +++ b/src/libraries/System.Globalization/tests/CultureInfo/CultureInfoCtor.cs @@ -1,6 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using Microsoft.DotNet.RemoteExecutor; using System.Collections.Generic; using Xunit; @@ -434,13 +435,65 @@ public void TestCreationWithTemporaryLCID(int lcid) Assert.NotEqual(lcid, new CultureInfo(lcid).LCID); } - [InlineData("zh-TW-u-co-zhuyin")] - [InlineData("de-DE-u-co-phoneb")] - [InlineData("de-u-co-phonebk")] + [InlineData("zh-TW-u-co-zhuyin", "zh-TW", "zh-TW_zhuyin")] + [InlineData("de-DE-u-co-phonebk", "de-DE", "de-DE_phoneboo")] + [InlineData("de-DE-u-co-phonebk-u-xx", "de-DE-u-xx", "de-DE-u-xx_phoneboo")] + [InlineData("de-DE-u-xx-u-co-phonebk", "de-DE-u-xx-u-co-phonebk", "de-DE-u-xx-u-co-phonebk")] + [InlineData("de-DE-t-xx-u-co-phonebk", "de-DE-t-xx-u-co-phonebk", "de-DE-t-xx-u-co-phonebk_phoneboo")] + [InlineData("de-DE-u-co-phonebk-t-xx", "de-DE-t-xx", "de-DE-t-xx_phoneboo")] + [InlineData("de-DE-u-co-phonebk-t-xx-u-yy", "de-DE-t-xx-u-yy", "de-DE-t-xx-u-yy_phoneboo")] + [InlineData("de-DE", "de-DE", "de-DE")] [ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.IsIcuGlobalization))] - public void TestCreationWithMangledSortName(string cultureName) + public void TestCreationWithMangledSortName(string cultureName, string expectedCultureName, string expectedSortName) + { + CultureInfo ci = CultureInfo.GetCultureInfo(cultureName); + + Assert.Equal(expectedCultureName, ci.Name); + Assert.Equal(expectedSortName, ci.CompareInfo.Name); + } + + [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsIcuGlobalization))] + public void TestNeutralCultureWithCollationName() { - Assert.True(CultureInfo.GetCultureInfo(cultureName).CompareInfo.Name.Equals(cultureName, StringComparison.OrdinalIgnoreCase)); + Assert.Throws(() => CultureInfo.GetCultureInfo("zh-u-co-zhuyin")); + Assert.Throws(() => CultureInfo.GetCultureInfo("de-u-co-phonebk")); + } + + [InlineData("xx-u-XX", "xx-u-xx")] + [InlineData("xx-u-XX-u-yy", "xx-u-xx-u-yy")] + [InlineData("xx-t-ja-JP", "xx-t-ja-jp")] + [InlineData("qps-plocm", "qps-PLOCM")] // ICU normalize this name to "qps--plocm" which we normalize it back to "qps-plocm" + [ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.IsIcuGlobalization))] + public void TestCreationWithICUNormalizedNames(string cultureName, string expectedCultureName) + { + CultureInfo ci = CultureInfo.GetCultureInfo(cultureName); + Assert.Equal(expectedCultureName, ci.Name); + } + + private static bool SupportRemoteExecutionWithIcu => RemoteExecutor.IsSupported && PlatformDetection.IsIcuGlobalization; + + [InlineData("xx-u-XX")] + [InlineData("xx-u-XX-u-yy")] + [InlineData("xx-t-ja-JP")] + [InlineData("qps-plocm")] + [InlineData("zh-TW-u-co-zhuyin")] + [InlineData("de-DE-u-co-phonebk")] + [InlineData("de-DE-u-co-phonebk-u-xx")] + [InlineData("de-DE-u-xx-u-co-phonebk")] + [InlineData("de-DE-t-xx-u-co-phonebk")] + [InlineData("de-DE-u-co-phonebk-t-xx")] + [InlineData("de-DE-u-co-phonebk-t-xx-u-yy")] + [InlineData("de-DE")] + [ConditionalTheory(nameof(SupportRemoteExecutionWithIcu))] + public void TestWithResourceLookup(string cultureName) + { + RemoteExecutor.Invoke(name => { + CultureInfo.CurrentUICulture = CultureInfo.GetCultureInfo(name); + int Zero = 0; + + // This should go through the resource manager to get the localized exception message using the current UI culture + Assert.Throws(() => 1 / Zero); + }, cultureName).Dispose(); } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/CultureData.Icu.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/CultureData.Icu.cs index aa8cb4391d480e..cfb829d617814e 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/CultureData.Icu.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/CultureData.Icu.cs @@ -12,6 +12,88 @@ internal sealed partial class CultureData // ICU constants private const int ICU_ULOC_KEYWORD_AND_VALUES_CAPACITY = 100; // max size of keyword or value private const int ICU_ULOC_FULLNAME_CAPACITY = 157; // max size of locale name + private const int WINDOWS_MAX_COLLATION_NAME_LENGTH = 8; // max collation name length in the culture name + + /// + /// Process the locale name that ICU returns and convert it to the format that .NET expects. + /// + /// The locale name that ICU returns. + /// The extension part in the original culture name. + /// The index of the collation in the name. + /// + /// BCP 47 specifications allow for extensions in the locale name, following the format language-script-region-extensions-collation. However, + /// not all extensions supported by ICU are supported in .NET. In the locale name, extensions are separated from the rest of the name using '-u-' or '-t-'. + /// In .NET, only the collation extension is supported. If the name includes a collation extension, it will be prefixed with '-u-co-'. + /// For example, en-US-u-co-search would be converted to the ICU name en_US@collation=search, which would then be translated to the .NET name en-US_search. + /// All extensions in the ICU names start with @. When normalizing the name to the .NET format, we retain the extensions in the name to ensure differentiation + /// between names with extensions and those without. For example, we may have a name like en-US and en-US-u-xx. Although .NET doesn't support the extension xx, + /// we still include it in the name to distinguish it from the name without the extension. + /// + private static string NormalizeCultureName(string name, ReadOnlySpan extension, out int collationStart) + { + Debug.Assert(name is not null); + Debug.Assert(name.Length <= ICU_ULOC_FULLNAME_CAPACITY); + + collationStart = -1; + bool changed = false; + Span buffer = stackalloc char[ICU_ULOC_FULLNAME_CAPACITY]; + int bufferIndex = 0; + + for (int i = 0; i < name.Length && bufferIndex < ICU_ULOC_FULLNAME_CAPACITY; i++) + { + char c = name[i]; + if (c == '-' && i < name.Length - 1 && name[i + 1] == '-') + { + // ICU changes names like `qps_plocm` (one underscore) to `qps__plocm` (two underscores) + // The reason this occurs is because, while ICU canonicalizing, ulocimp_getCountry returns an empty string since the country code value is > 3 (rightly so). + // But append an extra '_' thinking that country code was in-fact appended (for the empty string value as well). + // Before processing, the name qps__plocm will be converted to its .NET name equivalent, which is qps--plocm. + changed = true; + buffer[bufferIndex++] = '-'; + i++; + } + else if (c == '@') + { + changed = true; + + if (!extension.IsEmpty && extension.TryCopyTo(buffer.Slice(bufferIndex))) + { + bufferIndex += extension.Length; + } + + int collationIndex = name.IndexOf("collation=", i + 1, StringComparison.Ordinal); + if (collationIndex > 0) + { + collationIndex += "collation=".Length; + + // format of the locale properties is @key=value;collation=collationName;key=value;key=value + int endOfCollation = name.IndexOf(';', collationIndex); + if (endOfCollation < 0) + { + endOfCollation = name.Length; + } + + int length = Math.Min(WINDOWS_MAX_COLLATION_NAME_LENGTH, endOfCollation - collationIndex); // Windows doesn't allow collation names longer than 8 characters + if (buffer.Length - bufferIndex >= length + 1) + { + collationStart = bufferIndex; + buffer[bufferIndex++] = '_'; + name.AsSpan(collationIndex, length).CopyTo(buffer.Slice(bufferIndex)); + bufferIndex += length; + } + } + + // done getting all parts can be supported in the .NET culture names. + break; + } + else + { + buffer[bufferIndex++] = name[i]; + } + } + + return changed ? new string(buffer.Slice(0, bufferIndex)) : name; + } /// /// This method uses the sRealName field (which is initialized by the constructor before this is called) to @@ -26,16 +108,15 @@ private bool InitIcuCultureDataCore() string realNameBuffer = _sRealName; // Basic validation - if (!IsValidCultureName(realNameBuffer, out var index)) + if (!IsValidCultureName(realNameBuffer, out var index, out int indexOfExtensions)) { return false; } // Replace _ (alternate sort) with @collation= for ICU - ReadOnlySpan alternateSortName = default; if (index > 0) { - alternateSortName = realNameBuffer.AsSpan(index + 1); + ReadOnlySpan alternateSortName = realNameBuffer.AsSpan(index + 1); realNameBuffer = string.Concat(realNameBuffer.AsSpan(0, index), ICU_COLLATION_KEYWORD, alternateSortName); } @@ -45,22 +126,9 @@ private bool InitIcuCultureDataCore() return false; // fail } - // Replace the ICU collation keyword with an _ Debug.Assert(_sWindowsName != null); - index = _sWindowsName.IndexOf(ICU_COLLATION_KEYWORD, StringComparison.Ordinal); - if (index >= 0) - { - // Use original culture name if alternateSortName is not set, which is possible even if the normalized - // culture name has "@collation=". - // "zh-TW-u-co-zhuyin" is a good example. The term "u-co-" means the following part will be the sort name - // and it will be treated in ICU as "zh-TW@collation=zhuyin". - _sName = alternateSortName.Length == 0 ? realNameBuffer : string.Concat(_sWindowsName.AsSpan(0, index), "_", alternateSortName); - } - else - { - _sName = _sWindowsName; - } - _sRealName = _sName; + + _sRealName = NormalizeCultureName(_sWindowsName, indexOfExtensions > 0 ? _sRealName.AsSpan(indexOfExtensions) : ReadOnlySpan.Empty, out int collationStart); _iLanguage = LCID; if (_iLanguage == 0) @@ -69,11 +137,15 @@ private bool InitIcuCultureDataCore() } _bNeutral = TwoLetterISOCountryName.Length == 0; _sSpecificCulture = _bNeutral ? IcuLocaleData.GetSpecificCultureName(_sRealName) : _sRealName; - // Remove the sort from sName unless custom culture - if (index > 0 && !_bNeutral && !IsCustomCultureId(_iLanguage)) + + if (_bNeutral && collationStart > 0) { - _sName = _sWindowsName.Substring(0, index); + return false; // neutral cultures cannot have collation } + + // Remove the sort from sName unless custom culture + _sName = collationStart < 0 ? _sRealName : _sRealName.Substring(0, collationStart); + return true; } @@ -367,7 +439,7 @@ private static CultureInfo[] IcuEnumCultures(CultureTypes types) } bool enumNeutrals = (types & CultureTypes.NeutralCultures) != 0; - bool enumSpecificss = (types & CultureTypes.SpecificCultures) != 0; + bool enumSpecifics = (types & CultureTypes.SpecificCultures) != 0; List list = new List(); if (enumNeutrals) @@ -382,7 +454,7 @@ private static CultureInfo[] IcuEnumCultures(CultureTypes types) if (index + length <= bufferLength) { CultureInfo ci = CultureInfo.GetCultureInfo(new string(chars, index, length)); - if ((enumNeutrals && ci.IsNeutralCulture) || (enumSpecificss && !ci.IsNeutralCulture)) + if ((enumNeutrals && ci.IsNeutralCulture) || (enumSpecifics && !ci.IsNeutralCulture)) { list.Add(ci); } @@ -416,10 +488,14 @@ private static string IcuGetConsoleFallbackName(string cultureName) /// * Disallow input that starts or ends with '-' or '_'. /// * Disallow input that has any combination of consecutive '-' or '_'. /// * Disallow input that has multiple '_'. + /// + /// The IsValidCultureName method also identifies the presence of any extensions in the name (such as -u- or -t-) and returns the index of the extension. + /// This is necessary because we need to append the extensions to the name when normalizing it to the .NET format. /// - private static bool IsValidCultureName(string subject, out int indexOfUnderscore) + private static bool IsValidCultureName(string subject, out int indexOfUnderscore, out int indexOfExtensions) { indexOfUnderscore = -1; + indexOfExtensions = -1; if (subject.Length == 0) return true; // Invariant Culture if (subject.Length == 1 || subject.Length > LocaleNameMaxLength) return false; @@ -444,6 +520,16 @@ private static bool IsValidCultureName(string subject, out int indexOfUnderscore seenUnderscore = true; indexOfUnderscore = i; } + else + { + if (indexOfExtensions < 0 && i < subject.Length - 2 && (subject[i + 1] is 'u' or 't') && subject[i + 2] == '-') // we have -u- or -t- which is an extension + { + if (subject[i + 1] == 't' || i >= subject.Length - 6 || subject[i + 3] != 'c' || subject[i + 4] != 'o' || subject[i + 5] != '-' ) // not -u-co- collation extension + { + indexOfExtensions = i; + } + } + } } else {