From 5c338724a91811b22f8e6f3738ff788119ebab5f Mon Sep 17 00:00:00 2001 From: snktd Date: Tue, 27 Jun 2023 12:04:08 -0700 Subject: [PATCH 01/13] First draft of locale displayname algorithm --- experimental/displaynames/src/displaynames.rs | 248 +++++++++++++----- experimental/displaynames/tests/tests.rs | 49 ++++ 2 files changed, 235 insertions(+), 62 deletions(-) create mode 100644 experimental/displaynames/tests/tests.rs diff --git a/experimental/displaynames/src/displaynames.rs b/experimental/displaynames/src/displaynames.rs index b1cde9182e6..882d6fb04d2 100644 --- a/experimental/displaynames/src/displaynames.rs +++ b/experimental/displaynames/src/displaynames.rs @@ -4,12 +4,22 @@ //! This module contains types and implementations for the Displaynames component. +use crate::alloc::string::ToString; use crate::options::*; use crate::provider::*; use alloc::borrow::Cow; +use alloc::string::String; use icu_locid::{subtags::Language, subtags::Region, subtags::Script, subtags::Variant, Locale}; use icu_provider::prelude::*; +use alloc::format; +use alloc::vec; +use alloc::vec::Vec; +use core::str::FromStr; +use tinystr::TinyAsciiStr; +use zerovec::ule::UnvalidatedStr; +use zerovec::ZeroMap; + /// Lookup of the locale-specific display names by region code. /// /// # Example @@ -330,11 +340,9 @@ impl LanguageDisplayNames { /// ) /// .expect("Data should load successfully"); /// -/// assert_eq!(display_name.of(&locale!("de-CH")), "Swiss High German"); -/// assert_eq!(display_name.of(&locale!("de")), "German"); -/// assert_eq!(display_name.of(&locale!("de-MX")), "German (Mexico)"); -/// assert_eq!(display_name.of(&locale!("xx-YY")), "xx (YY)"); -/// assert_eq!(display_name.of(&locale!("xx")), "xx"); +/// assert_eq!(display_name.of(&locale!("en-GB")), "British English"); +/// assert_eq!(display_name.of(&locale!("en")), "English"); +/// assert_eq!(display_name.of(&locale!("en-MX")), "English (Mexico)"); /// ``` pub struct LocaleDisplayNamesFormatter { options: DisplayNamesOptions, @@ -401,88 +409,204 @@ impl LocaleDisplayNamesFormatter { /// Returns the display name of a locale. // TODO: Make this return a writeable instead of using alloc pub fn of<'a, 'b: 'a, 'c: 'a>(&'b self, locale: &'c Locale) -> Cow<'a, str> { - // https://www.unicode.org/reports/tr35/tr35-general.html#Display_Name_Elements + // Step - 1: Construct the canonical locale from a given locale. + let cannonical_locale = Locale::canonicalize(locale.to_string()).unwrap(); + + let longest_prefix = + match find_longest_matching_prefix(&cannonical_locale, &self.locale_data.get().names) { + Some(prefix) => prefix, + None => cannonical_locale + .split_once("-") + .unwrap_or((&cannonical_locale, "")) + .0 + .to_string(), + }; + + // Step - 2: Construct a locale display name string (LDN). + let ldn = get_locale_display_name(&longest_prefix, &self); + + // Step - 3: Construct a vector of longest qualifying substrings (LQS). + let lqs = get_longest_qualifying_substrings(&cannonical_locale, &longest_prefix, &self); + + // Step - 4: Return the displayname based on the size of LQS. + if lqs.len() == 0 { + return ldn.to_string().into(); + } else { + return Cow::Owned(alloc::format!("{} ({})", ldn, lqs.join(", "))); + } + } +} + +/// For a given string and the local data, find the longest prefix of the string that exists as a key in the data. +/// Note: this function implements a naive algorithm to find the longest matching prefix and this can be improved by either using +/// Binary Search algorithm or by implementing an intermediate Trie structure if needed. +/// The time complexity for this algorithm is o(n): where n is the number of words separated by "-" in "s". +fn find_longest_matching_prefix<'data>( + s: &str, + data: &ZeroMap<'data, UnvalidatedStr, str>, +) -> Option { + let vector = s.split("-").collect::>(); + let mut end = vector.len(); + + while end > 0 { + let current_prefix = vector[0..end].join("-"); + + if data + .get(UnvalidatedStr::from_str(current_prefix.as_str())) + .is_some() + { + return Some(current_prefix); + } + end -= 1; + } + return None; +} - // TODO: This binary search needs to return the longest matching found prefix - // instead of just perfect matches - if let Some(displayname) = match self.options.style { - Some(Style::Short) => self - .locale_data +fn get_locale_display_name<'a>( + longest_prefix: &'a str, + locale_dn_formatter: &'a LocaleDisplayNamesFormatter, +) -> &'a str { + let LocaleDisplayNamesFormatter { + options, + locale_data, + language_data, + .. + } = locale_dn_formatter; + + // Check if the key exists in the locale_data first. + // Example: "en_GB", "nl_BE". + let mut ldn = match options.style { + Some(Style::Short) => locale_data + .get() + .short_names + .get(UnvalidatedStr::from_str(longest_prefix)), + Some(Style::Long) => locale_data + .get() + .long_names + .get(UnvalidatedStr::from_str(longest_prefix)), + Some(Style::Menu) => locale_data + .get() + .menu_names + .get(UnvalidatedStr::from_str(longest_prefix)), + _ => None, + } + .or_else(|| { + locale_data + .get() + .names + .get(UnvalidatedStr::from_str(longest_prefix)) + }); + + // At this point the key should exist in the language_data. + // Example: "en", "nl", "zh". + if ldn.is_none() { + let tinystr_prefix = TinyAsciiStr::from_str(&longest_prefix).unwrap(); + ldn = match options.style { + // If the key is not found in locale, then search for the + Some(Style::Short) => language_data .get() .short_names - .get_by(|bytes| locale.strict_cmp(bytes).reverse()), - Some(Style::Long) => self - .locale_data + .get(&tinystr_prefix.to_unvalidated()), + Some(Style::Long) => language_data .get() .long_names - .get_by(|bytes| locale.strict_cmp(bytes).reverse()), - Some(Style::Menu) => self - .locale_data + .get(&tinystr_prefix.to_unvalidated()), + Some(Style::Menu) => language_data .get() .menu_names - .get_by(|bytes| locale.strict_cmp(bytes).reverse()), + .get(&tinystr_prefix.to_unvalidated()), _ => None, } .or_else(|| { - self.locale_data + language_data .get() .names - .get_by(|bytes| locale.strict_cmp(bytes).reverse()) - }) { - return Cow::Borrowed(displayname); - } + .get(&tinystr_prefix.to_unvalidated()) + }); + } - // TODO: This is a dummy implementation which does not adhere to UTS35. It only uses - // the language and region code, and uses a hardcoded pattern to combine them. + // Throw an error if the LDN is none as it is not possible to have a locale string without language. + return ldn.expect("cannot parse locale displayname."); +} - let langdisplay = match self.options.style { - Some(Style::Short) => self - .language_data +fn get_longest_qualifying_substrings<'a>( + cannonical_locale: &'a str, + language_prefix: &'a str, + locale_dn_formatter: &'a LocaleDisplayNamesFormatter, +) -> Vec<&'a str> { + let LocaleDisplayNamesFormatter { + options, + region_data, + script_data, + variant_data, + .. + } = locale_dn_formatter; + let mut lqs: Vec<&str> = vec![]; + + // Examples of the "language_prefix" are: "en-GB", "en", "nl-BE", "nl". + // Based on the algorithm, the computation of LQS should omit locale/language prefix from the key. + // + // This step is required because locale!("en-GB-Latn") would return locale { id { language: "en", region: "GB", Script: "Latn" }, .. }. + // However, because "en-GB" is a dialect and was included as part of LDN, it should ideally be locale { id { language: "en-GB", script: "Latn" }, .. }. + // To resolve this case, the "language_prefix" used to compute LDN is removed first from the locale string. + let str = cannonical_locale.replace(language_prefix, ""); + + if str.len() == 0 { + return lqs; + } + + // Add "und" to the beginning to ensure that the locale string can be parsed correctly. + let locale_str_with_unknown_language = format!("und{}", &str); + let locale: Locale = Locale::from_str(&locale_str_with_unknown_language).expect("parsing locale failed"); + + if let Some(script) = locale.id.script { + let scriptdisplay = match options.style { + Some(Style::Short) => script_data .get() .short_names - .get(&locale.id.language.into_tinystr().to_unvalidated()), - Some(Style::Long) => self - .language_data + .get(&script.into_tinystr().to_unvalidated()), + _ => None, + } + .or_else(|| { + script_data .get() - .long_names - .get(&locale.id.language.into_tinystr().to_unvalidated()), - Some(Style::Menu) => self - .language_data + .names + .get(&script.into_tinystr().to_unvalidated()) + }); + if let Some(scriptdn) = scriptdisplay { + lqs.push(scriptdn); + } + } + + if let Some(region) = locale.id.region { + let regiondisplay = match options.style { + Some(Style::Short) => region_data .get() - .menu_names - .get(&locale.id.language.into_tinystr().to_unvalidated()), + .short_names + .get(®ion.into_tinystr().to_unvalidated()), _ => None, } .or_else(|| { - self.language_data + region_data .get() .names - .get(&locale.id.language.into_tinystr().to_unvalidated()) + .get(®ion.into_tinystr().to_unvalidated()) }); - if let Some(region) = locale.id.region { - let regiondisplay = match self.options.style { - Some(Style::Short) => self - .region_data - .get() - .short_names - .get(®ion.into_tinystr().to_unvalidated()), - _ => None, - } - .or_else(|| { - self.region_data - .get() - .names - .get(®ion.into_tinystr().to_unvalidated()) - }); - // TODO: Use data patterns - Cow::Owned(alloc::format!( - "{} ({})", - langdisplay.unwrap_or(locale.id.language.as_str()), - regiondisplay.unwrap_or(region.as_str()) - )) - } else { - Cow::Borrowed(langdisplay.unwrap_or(locale.id.language.as_str())) + if let Some(regiondn) = regiondisplay { + lqs.push(regiondn); } } + + for &variant_key in locale.id.variants.iter() { + if let Some(variant_dn) = variant_data + .get() + .names + .get(&variant_key.into_tinystr().to_unvalidated()) + { + lqs.push(variant_dn); + } + } + + return lqs; } diff --git a/experimental/displaynames/tests/tests.rs b/experimental/displaynames/tests/tests.rs new file mode 100644 index 00000000000..4b3e413f300 --- /dev/null +++ b/experimental/displaynames/tests/tests.rs @@ -0,0 +1,49 @@ +use icu_displaynames::{DisplayNamesOptions, LocaleDisplayNamesFormatter}; +use icu_locid::locale; +use icu_locid::Locale; +use std::str::FromStr; + +#[test] +fn test_concatenate() { + #[derive(Debug)] + struct TestCase<'a> { + pub input_1: &'a Locale, + pub expected: &'a str, + } + let cases = [ + TestCase { + input_1: &locale!("de-CH"), + expected: "Swiss High German", + }, + TestCase { + input_1: &locale!("zh_Hans"), + expected: "Simplified Chinese", + }, + TestCase { + input_1: &locale!("es-419"), + expected: "Latin American Spanish", + }, + TestCase { + input_1: &locale!("es-Cyrl-MX"), + expected: "Spanish (Cyrillic, Mexico)", + }, + TestCase { + input_1: &Locale::from_str("en-Latn-GB-fonipa-scouse").unwrap(), + expected: "English (Latin, United Kingdom, IPA Phonetics, Scouse)", + }, + ]; + for cas in &cases { + // TODO: Add tests for different data locales. + let locale = locale!("en-001"); + let options: DisplayNamesOptions = Default::default(); + + let display_name = LocaleDisplayNamesFormatter::try_new_unstable( + &icu_testdata::unstable(), + &locale.into(), + options, + ) + .expect("Data should load successfully"); + + assert_eq!(display_name.of(cas.input_1), cas.expected); + } +} \ No newline at end of file From 17f19d94a12c81a555b389e113c7531031e4f96f Mon Sep 17 00:00:00 2001 From: snktd Date: Tue, 27 Jun 2023 12:08:08 -0700 Subject: [PATCH 02/13] Minor change in comment --- experimental/displaynames/src/displaynames.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/experimental/displaynames/src/displaynames.rs b/experimental/displaynames/src/displaynames.rs index 882d6fb04d2..29dcc7dd60b 100644 --- a/experimental/displaynames/src/displaynames.rs +++ b/experimental/displaynames/src/displaynames.rs @@ -407,6 +407,9 @@ impl LocaleDisplayNamesFormatter { ); /// Returns the display name of a locale. + /// This implementation is based on the algorithm described in + /// https://www.unicode.org/reports/tr35/tr35-general.html#locale_display_name_algorithm + /// // TODO: Make this return a writeable instead of using alloc pub fn of<'a, 'b: 'a, 'c: 'a>(&'b self, locale: &'c Locale) -> Cow<'a, str> { // Step - 1: Construct the canonical locale from a given locale. From 0141ac6a8a84865ecc072a892e53bcc10db2e3b0 Mon Sep 17 00:00:00 2001 From: snktd Date: Tue, 11 Jul 2023 12:30:03 -0700 Subject: [PATCH 03/13] Avoid constructing a new cannonical locale String. --- experimental/displaynames/Cargo.toml | 4 +++ experimental/displaynames/src/displaynames.rs | 30 ++++++++----------- 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/experimental/displaynames/Cargo.toml b/experimental/displaynames/Cargo.toml index 3624ed04aca..85e10e12ea3 100644 --- a/experimental/displaynames/Cargo.toml +++ b/experimental/displaynames/Cargo.toml @@ -47,3 +47,7 @@ icu_testdata = { path = "../../provider/testdata", default-features = false, fea std = ["icu_collections/std", "icu_locid/std", "icu_provider/std"] serde = ["dep:serde", "zerovec/serde", "icu_collections/serde", "tinystr/serde", "icu_provider/serde"] datagen = ["serde", "std", "dep:databake", "zerovec/databake", "icu_collections/databake", "tinystr/databake"] + +[[test]] +name = "tests" +path = "tests/tests.rs" \ No newline at end of file diff --git a/experimental/displaynames/src/displaynames.rs b/experimental/displaynames/src/displaynames.rs index 29dcc7dd60b..ccaa4e96fbd 100644 --- a/experimental/displaynames/src/displaynames.rs +++ b/experimental/displaynames/src/displaynames.rs @@ -412,26 +412,19 @@ impl LocaleDisplayNamesFormatter { /// // TODO: Make this return a writeable instead of using alloc pub fn of<'a, 'b: 'a, 'c: 'a>(&'b self, locale: &'c Locale) -> Cow<'a, str> { - // Step - 1: Construct the canonical locale from a given locale. - let cannonical_locale = Locale::canonicalize(locale.to_string()).unwrap(); - let longest_prefix = - match find_longest_matching_prefix(&cannonical_locale, &self.locale_data.get().names) { + match find_longest_matching_prefix(&locale, &self.locale_data.get().names) { Some(prefix) => prefix, - None => cannonical_locale - .split_once("-") - .unwrap_or((&cannonical_locale, "")) - .0 - .to_string(), + None => locale.id.language.to_string(), }; - // Step - 2: Construct a locale display name string (LDN). + // Step - 1: Construct a locale display name string (LDN). let ldn = get_locale_display_name(&longest_prefix, &self); - // Step - 3: Construct a vector of longest qualifying substrings (LQS). - let lqs = get_longest_qualifying_substrings(&cannonical_locale, &longest_prefix, &self); + // Step - 2: Construct a vector of longest qualifying substrings (LQS). + let lqs = get_longest_qualifying_substrings(&locale, &longest_prefix, &self); - // Step - 4: Return the displayname based on the size of LQS. + // Step - 3: Return the displayname based on the size of LQS. if lqs.len() == 0 { return ldn.to_string().into(); } else { @@ -440,15 +433,16 @@ impl LocaleDisplayNamesFormatter { } } -/// For a given string and the local data, find the longest prefix of the string that exists as a key in the data. +/// For a given string and the local data, find the longest prefix of the string that exists as a key in the locale data. /// Note: this function implements a naive algorithm to find the longest matching prefix and this can be improved by either using /// Binary Search algorithm or by implementing an intermediate Trie structure if needed. /// The time complexity for this algorithm is o(n): where n is the number of words separated by "-" in "s". fn find_longest_matching_prefix<'data>( - s: &str, + locale: &Locale, data: &ZeroMap<'data, UnvalidatedStr, str>, ) -> Option { - let vector = s.split("-").collect::>(); + let binding = locale.to_string(); + let vector = &binding.split("-").collect::>(); let mut end = vector.len(); while end > 0 { @@ -533,7 +527,7 @@ fn get_locale_display_name<'a>( } fn get_longest_qualifying_substrings<'a>( - cannonical_locale: &'a str, + cannonical_locale: &Locale, language_prefix: &'a str, locale_dn_formatter: &'a LocaleDisplayNamesFormatter, ) -> Vec<&'a str> { @@ -552,7 +546,7 @@ fn get_longest_qualifying_substrings<'a>( // This step is required because locale!("en-GB-Latn") would return locale { id { language: "en", region: "GB", Script: "Latn" }, .. }. // However, because "en-GB" is a dialect and was included as part of LDN, it should ideally be locale { id { language: "en-GB", script: "Latn" }, .. }. // To resolve this case, the "language_prefix" used to compute LDN is removed first from the locale string. - let str = cannonical_locale.replace(language_prefix, ""); + let str = cannonical_locale.to_string().replace(language_prefix, ""); if str.len() == 0 { return lqs; From 4a851491d650d780304d696fd67a3feb108088bf Mon Sep 17 00:00:00 2001 From: snktd Date: Fri, 11 Aug 2023 18:09:27 -0700 Subject: [PATCH 04/13] addressing review comments --- components/locid/src/langid.rs | 98 +++++++++ experimental/displaynames/src/displaynames.rs | 207 ++++++++++-------- experimental/displaynames/tests/tests.rs | 2 +- utils/tzif/src/lib.rs | 1 - 4 files changed, 210 insertions(+), 98 deletions(-) diff --git a/components/locid/src/langid.rs b/components/locid/src/langid.rs index 78668bc0dee..2c658f9a78c 100644 --- a/components/locid/src/langid.rs +++ b/components/locid/src/langid.rs @@ -504,3 +504,101 @@ impl From<&LanguageIdentifier> (langid.language, langid.script, langid.region) } } + +/// Convert from an LS tuple to a [`LanguageIdentifier`]. +/// +/// # Examples +/// +/// ``` +/// use icu::locid::{ +/// langid, subtags_language as language, +/// subtags_script as script, LanguageIdentifier, +/// }; +/// +/// let lang = language!("en"); +/// let script = script!("Latn"); +/// assert_eq!( +/// LanguageIdentifier::from((lang, script)), +/// langid!("en-Latn") +/// ); +/// ``` +impl From<(subtags::Language, subtags::Script)> for LanguageIdentifier { + fn from(lsr: (subtags::Language, subtags::Script)) -> Self { + Self { + language: lsr.0, + script: Some(lsr.1), + ..Default::default() + } + } +} + +/// Convert from a [`LanguageIdentifier`] to an LS tuple. +/// +/// # Examples +/// +/// ``` +/// use icu::locid::{ +/// langid, subtags_language as language, +/// subtags_script as script, +/// }; +/// +/// let lid = langid!("en-Latn"); +/// let (lang, script) = (&lid).into(); +/// +/// assert_eq!(lang, language!("en")); +/// assert_eq!(script, script!("Latn")); +/// ``` +impl From<&LanguageIdentifier> for (subtags::Language, subtags::Script) { + fn from(langid: &LanguageIdentifier) -> Self { + (langid.language, langid.script.unwrap()) + } +} + +/// Convert from an LR tuple to a [`LanguageIdentifier`]. +/// +/// # Examples +/// +/// ``` +/// use icu::locid::{ +/// langid, subtags_language as language, +/// subtags_region as region, LanguageIdentifier, +/// }; +/// +/// let lang = language!("en"); +/// let region = region!("GB"); +/// assert_eq!( +/// LanguageIdentifier::from((lang, region)), +/// langid!("en-GB") +/// ); +/// ``` +impl From<(subtags::Language, subtags::Region)> for LanguageIdentifier { + fn from(lsr: (subtags::Language, subtags::Region)) -> Self { + Self { + language: lsr.0, + region: Some(lsr.1), + ..Default::default() + } + } +} + +/// Convert from a [`LanguageIdentifier`] to an LR tuple. +/// +/// # Examples +/// +/// ``` +/// use icu::locid::{ +/// langid, subtags_language as language, +/// subtags_region as region, +/// }; +/// +/// let lid = langid!("en-GB"); +/// let (lang, region) = (&lid).into(); +/// +/// assert_eq!(lang, language!("en")); +/// assert_eq!(region, region!("GB")); +/// ``` +impl From<&LanguageIdentifier> for (subtags::Language, subtags::Region) { + fn from(langid: &LanguageIdentifier) -> Self { + (langid.language, langid.region.unwrap()) + } +} diff --git a/experimental/displaynames/src/displaynames.rs b/experimental/displaynames/src/displaynames.rs index ccaa4e96fbd..367e06ca371 100644 --- a/experimental/displaynames/src/displaynames.rs +++ b/experimental/displaynames/src/displaynames.rs @@ -8,17 +8,13 @@ use crate::alloc::string::ToString; use crate::options::*; use crate::provider::*; use alloc::borrow::Cow; -use alloc::string::String; -use icu_locid::{subtags::Language, subtags::Region, subtags::Script, subtags::Variant, Locale}; -use icu_provider::prelude::*; - -use alloc::format; use alloc::vec; use alloc::vec::Vec; -use core::str::FromStr; -use tinystr::TinyAsciiStr; -use zerovec::ule::UnvalidatedStr; -use zerovec::ZeroMap; +use icu_locid::{ + subtags::Language, subtags::Region, subtags::Script, subtags::Variant, LanguageIdentifier, + Locale, +}; +use icu_provider::prelude::*; /// Lookup of the locale-specific display names by region code. /// @@ -361,6 +357,21 @@ pub struct LocaleDisplayNamesFormatter { // transforms_data: DataPayload, } +// LongestMatching subtag is a longest substring of a given locale that exists as a key in the CLDR locale data. +// This is used for implementing Locale Display Name Algorithm. +#[derive(PartialEq)] +enum LongestMatchingSubtag { + // Longest matching subtag of type ${lang}-${region}. + // Example: "de-ET", "en-GB" + LangRegion, + // Longest matching subtag of type ${lang}-${script}. + // Example: "hi-Latn", "zh-Hans" + LangScript, + // Longest matching subtag of type ${lang} + // Example: "en", "hi" + Lang, +} + impl LocaleDisplayNamesFormatter { /// Creates a new [`LocaleDisplayNamesFormatter`] from locale data and an options bag. /// @@ -409,20 +420,18 @@ impl LocaleDisplayNamesFormatter { /// Returns the display name of a locale. /// This implementation is based on the algorithm described in /// https://www.unicode.org/reports/tr35/tr35-general.html#locale_display_name_algorithm - /// + /// // TODO: Make this return a writeable instead of using alloc pub fn of<'a, 'b: 'a, 'c: 'a>(&'b self, locale: &'c Locale) -> Cow<'a, str> { - let longest_prefix = - match find_longest_matching_prefix(&locale, &self.locale_data.get().names) { - Some(prefix) => prefix, - None => locale.id.language.to_string(), - }; + let longest_matching_subtag = find_longest_matching_subtag(&locale, &self); // Step - 1: Construct a locale display name string (LDN). - let ldn = get_locale_display_name(&longest_prefix, &self); + // Find the displayname for the longest_matching_subtag which was derived above. + let ldn = get_locale_display_name(&locale, &longest_matching_subtag, &self); // Step - 2: Construct a vector of longest qualifying substrings (LQS). - let lqs = get_longest_qualifying_substrings(&locale, &longest_prefix, &self); + // Find the displayname for the remaining locale if exists. + let lqs = get_longest_qualifying_substrings(&locale, &longest_matching_subtag, &self); // Step - 3: Return the displayname based on the size of LQS. if lqs.len() == 0 { @@ -433,34 +442,45 @@ impl LocaleDisplayNamesFormatter { } } -/// For a given string and the local data, find the longest prefix of the string that exists as a key in the locale data. -/// Note: this function implements a naive algorithm to find the longest matching prefix and this can be improved by either using -/// Binary Search algorithm or by implementing an intermediate Trie structure if needed. -/// The time complexity for this algorithm is o(n): where n is the number of words separated by "-" in "s". -fn find_longest_matching_prefix<'data>( +/// For a given locale and the data, find the longest prefix of the string that exists as a key in the CLDR locale data. +fn find_longest_matching_subtag<'a>( locale: &Locale, - data: &ZeroMap<'data, UnvalidatedStr, str>, -) -> Option { - let binding = locale.to_string(); - let vector = &binding.split("-").collect::>(); - let mut end = vector.len(); - - while end > 0 { - let current_prefix = vector[0..end].join("-"); + locale_dn_formatter: &'a LocaleDisplayNamesFormatter, +) -> LongestMatchingSubtag { + let LocaleDisplayNamesFormatter { locale_data, .. } = locale_dn_formatter; - if data - .get(UnvalidatedStr::from_str(current_prefix.as_str())) + // NOTE: The subtag ordering of the canonical locale is `languageᵣ_scriptᵣ_regionᵣ + variants + extensions`. + // The logic to find the longest matching subtag is based on this ordering. + if let Some(script) = locale.id.script { + let lang_script_identifier: LanguageIdentifier = (locale.id.language, script).into(); + if locale_data + .get() + .names + .get_by(|uvstr| lang_script_identifier.strict_cmp(uvstr).reverse()) .is_some() { - return Some(current_prefix); + return LongestMatchingSubtag::LangScript; } - end -= 1; } - return None; + if let Some(region) = locale.id.region { + if locale.id.script.is_none() { + let lang_region_identifier: LanguageIdentifier = (locale.id.language, region).into(); + if locale_data + .get() + .names + .get_by(|uvstr| lang_region_identifier.strict_cmp(uvstr).reverse()) + .is_some() + { + return LongestMatchingSubtag::LangRegion; + } + } + } + return LongestMatchingSubtag::Lang; } fn get_locale_display_name<'a>( - longest_prefix: &'a str, + locale: &Locale, + longest_matching_subtag: &LongestMatchingSubtag, locale_dn_formatter: &'a LocaleDisplayNamesFormatter, ) -> &'a str { let LocaleDisplayNamesFormatter { @@ -470,65 +490,69 @@ fn get_locale_display_name<'a>( .. } = locale_dn_formatter; + let lang_id: LanguageIdentifier = match *longest_matching_subtag { + LongestMatchingSubtag::LangRegion => (locale.id.language, locale.id.region.unwrap()).into(), + LongestMatchingSubtag::LangScript => (locale.id.language, locale.id.script.unwrap()).into(), + LongestMatchingSubtag::Lang => locale.id.language.into(), + }; + // Check if the key exists in the locale_data first. // Example: "en_GB", "nl_BE". let mut ldn = match options.style { Some(Style::Short) => locale_data .get() .short_names - .get(UnvalidatedStr::from_str(longest_prefix)), + .get_by(|uvstr| lang_id.strict_cmp(uvstr).reverse()), Some(Style::Long) => locale_data .get() .long_names - .get(UnvalidatedStr::from_str(longest_prefix)), + .get_by(|uvstr| lang_id.strict_cmp(uvstr).reverse()), Some(Style::Menu) => locale_data .get() .menu_names - .get(UnvalidatedStr::from_str(longest_prefix)), + .get_by(|uvstr| lang_id.strict_cmp(uvstr).reverse()), _ => None, } .or_else(|| { locale_data .get() .names - .get(UnvalidatedStr::from_str(longest_prefix)) + .get_by(|uvstr| lang_id.strict_cmp(uvstr).reverse()) }); // At this point the key should exist in the language_data. // Example: "en", "nl", "zh". if ldn.is_none() { - let tinystr_prefix = TinyAsciiStr::from_str(&longest_prefix).unwrap(); ldn = match options.style { - // If the key is not found in locale, then search for the Some(Style::Short) => language_data .get() .short_names - .get(&tinystr_prefix.to_unvalidated()), + .get(&lang_id.language.into_tinystr().to_unvalidated()), Some(Style::Long) => language_data .get() .long_names - .get(&tinystr_prefix.to_unvalidated()), + .get(&lang_id.language.into_tinystr().to_unvalidated()), Some(Style::Menu) => language_data .get() .menu_names - .get(&tinystr_prefix.to_unvalidated()), + .get(&lang_id.language.into_tinystr().to_unvalidated()), _ => None, } .or_else(|| { language_data .get() .names - .get(&tinystr_prefix.to_unvalidated()) + .get(&lang_id.language.into_tinystr().to_unvalidated()) }); } - // Throw an error if the LDN is none as it is not possible to have a locale string without language. + // Throw an error if the LDN is none as it is not possible to have a locale string without the language. return ldn.expect("cannot parse locale displayname."); } fn get_longest_qualifying_substrings<'a>( - cannonical_locale: &Locale, - language_prefix: &'a str, + locale: &Locale, + longest_matching_subtag: &LongestMatchingSubtag, locale_dn_formatter: &'a LocaleDisplayNamesFormatter, ) -> Vec<&'a str> { let LocaleDisplayNamesFormatter { @@ -538,60 +562,51 @@ fn get_longest_qualifying_substrings<'a>( variant_data, .. } = locale_dn_formatter; - let mut lqs: Vec<&str> = vec![]; - // Examples of the "language_prefix" are: "en-GB", "en", "nl-BE", "nl". - // Based on the algorithm, the computation of LQS should omit locale/language prefix from the key. - // - // This step is required because locale!("en-GB-Latn") would return locale { id { language: "en", region: "GB", Script: "Latn" }, .. }. - // However, because "en-GB" is a dialect and was included as part of LDN, it should ideally be locale { id { language: "en-GB", script: "Latn" }, .. }. - // To resolve this case, the "language_prefix" used to compute LDN is removed first from the locale string. - let str = cannonical_locale.to_string().replace(language_prefix, ""); - - if str.len() == 0 { - return lqs; - } - - // Add "und" to the beginning to ensure that the locale string can be parsed correctly. - let locale_str_with_unknown_language = format!("und{}", &str); - let locale: Locale = Locale::from_str(&locale_str_with_unknown_language).expect("parsing locale failed"); + let mut lqs: Vec<&str> = vec![]; if let Some(script) = locale.id.script { - let scriptdisplay = match options.style { - Some(Style::Short) => script_data - .get() - .short_names - .get(&script.into_tinystr().to_unvalidated()), - _ => None, - } - .or_else(|| { - script_data - .get() - .names - .get(&script.into_tinystr().to_unvalidated()) - }); - if let Some(scriptdn) = scriptdisplay { - lqs.push(scriptdn); + // Ignore if the script was used to derive LDN. + if *longest_matching_subtag != LongestMatchingSubtag::LangScript { + let scriptdisplay = match options.style { + Some(Style::Short) => script_data + .get() + .short_names + .get(&script.into_tinystr().to_unvalidated()), + _ => None, + } + .or_else(|| { + script_data + .get() + .names + .get(&script.into_tinystr().to_unvalidated()) + }); + if let Some(scriptdn) = scriptdisplay { + lqs.push(scriptdn); + } } } if let Some(region) = locale.id.region { - let regiondisplay = match options.style { - Some(Style::Short) => region_data - .get() - .short_names - .get(®ion.into_tinystr().to_unvalidated()), - _ => None, - } - .or_else(|| { - region_data - .get() - .names - .get(®ion.into_tinystr().to_unvalidated()) - }); - - if let Some(regiondn) = regiondisplay { - lqs.push(regiondn); + // Ignore if the region was used to derive LDN. + if *longest_matching_subtag != LongestMatchingSubtag::LangRegion { + let regiondisplay = match options.style { + Some(Style::Short) => region_data + .get() + .short_names + .get(®ion.into_tinystr().to_unvalidated()), + _ => None, + } + .or_else(|| { + region_data + .get() + .names + .get(®ion.into_tinystr().to_unvalidated()) + }); + + if let Some(regiondn) = regiondisplay { + lqs.push(regiondn); + } } } diff --git a/experimental/displaynames/tests/tests.rs b/experimental/displaynames/tests/tests.rs index 4b3e413f300..14ebb20ce23 100644 --- a/experimental/displaynames/tests/tests.rs +++ b/experimental/displaynames/tests/tests.rs @@ -46,4 +46,4 @@ fn test_concatenate() { assert_eq!(display_name.of(cas.input_1), cas.expected); } -} \ No newline at end of file +} diff --git a/utils/tzif/src/lib.rs b/utils/tzif/src/lib.rs index 6bd7e023d4d..47641997aed 100644 --- a/utils/tzif/src/lib.rs +++ b/utils/tzif/src/lib.rs @@ -30,7 +30,6 @@ use data::{posix::PosixTzString, tzif::TzifData}; use error::Error; use std::fs::File; use std::path::Path; - /// The parsed data representations. pub mod data; From 19ea386b6741e825dac475864d5b1cb845a4a167 Mon Sep 17 00:00:00 2001 From: snktd Date: Tue, 15 Aug 2023 11:18:02 -0700 Subject: [PATCH 05/13] Addressing comments - round 2 --- components/locid/src/langid.rs | 98 -------------- experimental/displaynames/src/displaynames.rs | 126 ++++++++++-------- experimental/displaynames/tests/tests.rs | 5 + 3 files changed, 74 insertions(+), 155 deletions(-) diff --git a/components/locid/src/langid.rs b/components/locid/src/langid.rs index 2c658f9a78c..78668bc0dee 100644 --- a/components/locid/src/langid.rs +++ b/components/locid/src/langid.rs @@ -504,101 +504,3 @@ impl From<&LanguageIdentifier> (langid.language, langid.script, langid.region) } } - -/// Convert from an LS tuple to a [`LanguageIdentifier`]. -/// -/// # Examples -/// -/// ``` -/// use icu::locid::{ -/// langid, subtags_language as language, -/// subtags_script as script, LanguageIdentifier, -/// }; -/// -/// let lang = language!("en"); -/// let script = script!("Latn"); -/// assert_eq!( -/// LanguageIdentifier::from((lang, script)), -/// langid!("en-Latn") -/// ); -/// ``` -impl From<(subtags::Language, subtags::Script)> for LanguageIdentifier { - fn from(lsr: (subtags::Language, subtags::Script)) -> Self { - Self { - language: lsr.0, - script: Some(lsr.1), - ..Default::default() - } - } -} - -/// Convert from a [`LanguageIdentifier`] to an LS tuple. -/// -/// # Examples -/// -/// ``` -/// use icu::locid::{ -/// langid, subtags_language as language, -/// subtags_script as script, -/// }; -/// -/// let lid = langid!("en-Latn"); -/// let (lang, script) = (&lid).into(); -/// -/// assert_eq!(lang, language!("en")); -/// assert_eq!(script, script!("Latn")); -/// ``` -impl From<&LanguageIdentifier> for (subtags::Language, subtags::Script) { - fn from(langid: &LanguageIdentifier) -> Self { - (langid.language, langid.script.unwrap()) - } -} - -/// Convert from an LR tuple to a [`LanguageIdentifier`]. -/// -/// # Examples -/// -/// ``` -/// use icu::locid::{ -/// langid, subtags_language as language, -/// subtags_region as region, LanguageIdentifier, -/// }; -/// -/// let lang = language!("en"); -/// let region = region!("GB"); -/// assert_eq!( -/// LanguageIdentifier::from((lang, region)), -/// langid!("en-GB") -/// ); -/// ``` -impl From<(subtags::Language, subtags::Region)> for LanguageIdentifier { - fn from(lsr: (subtags::Language, subtags::Region)) -> Self { - Self { - language: lsr.0, - region: Some(lsr.1), - ..Default::default() - } - } -} - -/// Convert from a [`LanguageIdentifier`] to an LR tuple. -/// -/// # Examples -/// -/// ``` -/// use icu::locid::{ -/// langid, subtags_language as language, -/// subtags_region as region, -/// }; -/// -/// let lid = langid!("en-GB"); -/// let (lang, region) = (&lid).into(); -/// -/// assert_eq!(lang, language!("en")); -/// assert_eq!(region, region!("GB")); -/// ``` -impl From<&LanguageIdentifier> for (subtags::Language, subtags::Region) { - fn from(langid: &LanguageIdentifier) -> Self { - (langid.language, langid.region.unwrap()) - } -} diff --git a/experimental/displaynames/src/displaynames.rs b/experimental/displaynames/src/displaynames.rs index 367e06ca371..64b37ecf163 100644 --- a/experimental/displaynames/src/displaynames.rs +++ b/experimental/displaynames/src/displaynames.rs @@ -4,10 +4,10 @@ //! This module contains types and implementations for the Displaynames component. -use crate::alloc::string::ToString; use crate::options::*; use crate::provider::*; use alloc::borrow::Cow; +use alloc::string::String; use alloc::vec; use alloc::vec::Vec; use icu_locid::{ @@ -15,7 +15,6 @@ use icu_locid::{ Locale, }; use icu_provider::prelude::*; - /// Lookup of the locale-specific display names by region code. /// /// # Example @@ -116,7 +115,6 @@ pub struct ScriptDisplayNames { script_data: DataPayload, } -#[allow(dead_code)] // not public at the moment impl ScriptDisplayNames { /// Creates a new [`ScriptDisplayNames`] from locale data and an options bag. /// @@ -193,7 +191,6 @@ pub struct VariantDisplayNames { variant_data: DataPayload, } -#[allow(dead_code)] // not public at the moment impl VariantDisplayNames { /// Creates a new [`VariantDisplayNames`] from locale data and an options bag. /// @@ -346,10 +343,8 @@ pub struct LocaleDisplayNamesFormatter { locale_data: DataPayload, language_data: DataPayload, - #[allow(dead_code)] // TODO use this script_data: DataPayload, region_data: DataPayload, - #[allow(dead_code)] // TODO add support for variants variant_data: DataPayload, // key_data: DataPayload, // measuerment_data: DataPayload, @@ -359,7 +354,7 @@ pub struct LocaleDisplayNamesFormatter { // LongestMatching subtag is a longest substring of a given locale that exists as a key in the CLDR locale data. // This is used for implementing Locale Display Name Algorithm. -#[derive(PartialEq)] +#[derive(PartialEq, Clone, Copy)] enum LongestMatchingSubtag { // Longest matching subtag of type ${lang}-${region}. // Example: "de-ET", "en-GB" @@ -372,6 +367,46 @@ enum LongestMatchingSubtag { Lang, } +impl LongestMatchingSubtag { + /// For a given locale and the data, find the longest prefix of the string that exists as a key in the CLDR locale data. + pub fn find_longest_matching_subtag<'a>( + locale: &Locale, + locale_dn_formatter: &'a LocaleDisplayNamesFormatter, + ) -> Self { + let LocaleDisplayNamesFormatter { locale_data, .. } = locale_dn_formatter; + + // NOTE: The subtag ordering of the canonical locale is `language_script_region + variants + extensions`. + // The logic to find the longest matching subtag is based on this ordering. + if let Some(script) = locale.id.script { + let lang_script_identifier: LanguageIdentifier = + (locale.id.language, Some(script), None).into(); + if locale_data + .get() + .names + .get_by(|uvstr| lang_script_identifier.strict_cmp(uvstr).reverse()) + .is_some() + { + return LongestMatchingSubtag::LangScript; + } + } + if let Some(region) = locale.id.region { + if locale.id.script.is_none() { + let lang_region_identifier: LanguageIdentifier = + (locale.id.language, None, Some(region)).into(); + if locale_data + .get() + .names + .get_by(|uvstr| lang_region_identifier.strict_cmp(uvstr).reverse()) + .is_some() + { + return LongestMatchingSubtag::LangRegion; + } + } + } + return LongestMatchingSubtag::Lang; + } +} + impl LocaleDisplayNamesFormatter { /// Creates a new [`LocaleDisplayNamesFormatter`] from locale data and an options bag. /// @@ -423,64 +458,42 @@ impl LocaleDisplayNamesFormatter { /// // TODO: Make this return a writeable instead of using alloc pub fn of<'a, 'b: 'a, 'c: 'a>(&'b self, locale: &'c Locale) -> Cow<'a, str> { - let longest_matching_subtag = find_longest_matching_subtag(&locale, &self); + let longest_matching_subtag = + LongestMatchingSubtag::find_longest_matching_subtag(&locale, &self); // Step - 1: Construct a locale display name string (LDN). // Find the displayname for the longest_matching_subtag which was derived above. - let ldn = get_locale_display_name(&locale, &longest_matching_subtag, &self); + let ldn = get_locale_display_name(&locale, longest_matching_subtag, &self); // Step - 2: Construct a vector of longest qualifying substrings (LQS). // Find the displayname for the remaining locale if exists. - let lqs = get_longest_qualifying_substrings(&locale, &longest_matching_subtag, &self); + let lqs = get_longest_qualifying_substrings(&locale, longest_matching_subtag, &self); // Step - 3: Return the displayname based on the size of LQS. - if lqs.len() == 0 { - return ldn.to_string().into(); - } else { - return Cow::Owned(alloc::format!("{} ({})", ldn, lqs.join(", "))); - } - } -} - -/// For a given locale and the data, find the longest prefix of the string that exists as a key in the CLDR locale data. -fn find_longest_matching_subtag<'a>( - locale: &Locale, - locale_dn_formatter: &'a LocaleDisplayNamesFormatter, -) -> LongestMatchingSubtag { - let LocaleDisplayNamesFormatter { locale_data, .. } = locale_dn_formatter; - - // NOTE: The subtag ordering of the canonical locale is `languageᵣ_scriptᵣ_regionᵣ + variants + extensions`. - // The logic to find the longest matching subtag is based on this ordering. - if let Some(script) = locale.id.script { - let lang_script_identifier: LanguageIdentifier = (locale.id.language, script).into(); - if locale_data - .get() - .names - .get_by(|uvstr| lang_script_identifier.strict_cmp(uvstr).reverse()) - .is_some() - { - return LongestMatchingSubtag::LangScript; - } - } - if let Some(region) = locale.id.region { - if locale.id.script.is_none() { - let lang_region_identifier: LanguageIdentifier = (locale.id.language, region).into(); - if locale_data - .get() - .names - .get_by(|uvstr| lang_region_identifier.strict_cmp(uvstr).reverse()) - .is_some() - { - return LongestMatchingSubtag::LangRegion; + let mut result = Cow::Borrowed(ldn); + if lqs.len() > 0 { + let mut output = String::with_capacity( + result.len() + " (".len() + lqs.iter().map(|s| ", ".len() + s.len()).sum::() + - ", ".len() + + ")".len(), + ); + output.push_str(&result); + output.push_str(" ("); + output.push_str(lqs[0]); + for lqs in &lqs[1..] { + output.push_str(", "); + output.push_str(lqs); } + output.push_str(")"); + result = Cow::Owned(output); } + result } - return LongestMatchingSubtag::Lang; } fn get_locale_display_name<'a>( locale: &Locale, - longest_matching_subtag: &LongestMatchingSubtag, + longest_matching_subtag: LongestMatchingSubtag, locale_dn_formatter: &'a LocaleDisplayNamesFormatter, ) -> &'a str { let LocaleDisplayNamesFormatter { @@ -490,9 +503,9 @@ fn get_locale_display_name<'a>( .. } = locale_dn_formatter; - let lang_id: LanguageIdentifier = match *longest_matching_subtag { - LongestMatchingSubtag::LangRegion => (locale.id.language, locale.id.region.unwrap()).into(), - LongestMatchingSubtag::LangScript => (locale.id.language, locale.id.script.unwrap()).into(), + let lang_id: LanguageIdentifier = match longest_matching_subtag { + LongestMatchingSubtag::LangRegion => (locale.id.language, None, locale.id.region).into(), + LongestMatchingSubtag::LangScript => (locale.id.language, locale.id.script, None).into(), LongestMatchingSubtag::Lang => locale.id.language.into(), }; @@ -545,14 +558,13 @@ fn get_locale_display_name<'a>( .get(&lang_id.language.into_tinystr().to_unvalidated()) }); } - // Throw an error if the LDN is none as it is not possible to have a locale string without the language. return ldn.expect("cannot parse locale displayname."); } fn get_longest_qualifying_substrings<'a>( locale: &Locale, - longest_matching_subtag: &LongestMatchingSubtag, + longest_matching_subtag: LongestMatchingSubtag, locale_dn_formatter: &'a LocaleDisplayNamesFormatter, ) -> Vec<&'a str> { let LocaleDisplayNamesFormatter { @@ -567,7 +579,7 @@ fn get_longest_qualifying_substrings<'a>( if let Some(script) = locale.id.script { // Ignore if the script was used to derive LDN. - if *longest_matching_subtag != LongestMatchingSubtag::LangScript { + if longest_matching_subtag != LongestMatchingSubtag::LangScript { let scriptdisplay = match options.style { Some(Style::Short) => script_data .get() @@ -589,7 +601,7 @@ fn get_longest_qualifying_substrings<'a>( if let Some(region) = locale.id.region { // Ignore if the region was used to derive LDN. - if *longest_matching_subtag != LongestMatchingSubtag::LangRegion { + if longest_matching_subtag != LongestMatchingSubtag::LangRegion { let regiondisplay = match options.style { Some(Style::Short) => region_data .get() diff --git a/experimental/displaynames/tests/tests.rs b/experimental/displaynames/tests/tests.rs index 14ebb20ce23..f5b79b2a4e4 100644 --- a/experimental/displaynames/tests/tests.rs +++ b/experimental/displaynames/tests/tests.rs @@ -31,6 +31,11 @@ fn test_concatenate() { input_1: &Locale::from_str("en-Latn-GB-fonipa-scouse").unwrap(), expected: "English (Latin, United Kingdom, IPA Phonetics, Scouse)", }, + // TODO: Uncomment this when the fallback is properly supported. + // TestCase { + // input_1: &locale!("xx-YY"), + // expected: "xx (YY)", + // }, ]; for cas in &cases { // TODO: Add tests for different data locales. From 4a00a0555bf76568662c58ea789a8a4f9779019a Mon Sep 17 00:00:00 2001 From: snktd Date: Mon, 21 Aug 2023 17:07:15 -0700 Subject: [PATCH 06/13] Moving methods to LocaleDisplayNamesFormatter --- experimental/displaynames/src/displaynames.rs | 315 +++++++++--------- experimental/displaynames/tests/tests.rs | 3 +- 2 files changed, 158 insertions(+), 160 deletions(-) diff --git a/experimental/displaynames/src/displaynames.rs b/experimental/displaynames/src/displaynames.rs index e6f48d45803..6b195032a55 100644 --- a/experimental/displaynames/src/displaynames.rs +++ b/experimental/displaynames/src/displaynames.rs @@ -358,7 +358,7 @@ pub struct LocaleDisplayNamesFormatter { // LongestMatching subtag is a longest substring of a given locale that exists as a key in the CLDR locale data. // This is used for implementing Locale Display Name Algorithm. #[derive(PartialEq, Clone, Copy)] -enum LongestMatchingSubtag { +pub enum LongestMatchingSubtag { // Longest matching subtag of type ${lang}-${region}. // Example: "de-ET", "en-GB" LangRegion, @@ -370,46 +370,6 @@ enum LongestMatchingSubtag { Lang, } -impl LongestMatchingSubtag { - /// For a given locale and the data, find the longest prefix of the string that exists as a key in the CLDR locale data. - pub fn find_longest_matching_subtag<'a>( - locale: &Locale, - locale_dn_formatter: &'a LocaleDisplayNamesFormatter, - ) -> Self { - let LocaleDisplayNamesFormatter { locale_data, .. } = locale_dn_formatter; - - // NOTE: The subtag ordering of the canonical locale is `language_script_region + variants + extensions`. - // The logic to find the longest matching subtag is based on this ordering. - if let Some(script) = locale.id.script { - let lang_script_identifier: LanguageIdentifier = - (locale.id.language, Some(script), None).into(); - if locale_data - .get() - .names - .get_by(|uvstr| lang_script_identifier.strict_cmp(uvstr).reverse()) - .is_some() - { - return LongestMatchingSubtag::LangScript; - } - } - if let Some(region) = locale.id.region { - if locale.id.script.is_none() { - let lang_region_identifier: LanguageIdentifier = - (locale.id.language, None, Some(region)).into(); - if locale_data - .get() - .names - .get_by(|uvstr| lang_region_identifier.strict_cmp(uvstr).reverse()) - .is_some() - { - return LongestMatchingSubtag::LangRegion; - } - } - } - return LongestMatchingSubtag::Lang; - } -} - impl LocaleDisplayNamesFormatter { icu_provider::gen_any_buffer_data_constructors!( locale: include, @@ -464,15 +424,15 @@ impl LocaleDisplayNamesFormatter { // TODO: Make this return a writeable instead of using alloc pub fn of<'a, 'b: 'a, 'c: 'a>(&'b self, locale: &'c Locale) -> Cow<'a, str> { let longest_matching_subtag = - LongestMatchingSubtag::find_longest_matching_subtag(&locale, &self); + self.find_longest_matching_subtag(&locale); // Step - 1: Construct a locale display name string (LDN). // Find the displayname for the longest_matching_subtag which was derived above. - let ldn = get_locale_display_name(&locale, longest_matching_subtag, &self); + let ldn = self.get_locale_display_name(&locale, longest_matching_subtag); // Step - 2: Construct a vector of longest qualifying substrings (LQS). // Find the displayname for the remaining locale if exists. - let lqs = get_longest_qualifying_substrings(&locale, longest_matching_subtag, &self); + let lqs = self.get_longest_qualifying_substrings(&locale, longest_matching_subtag); // Step - 3: Return the displayname based on the size of LQS. let mut result = Cow::Borrowed(ldn); @@ -494,148 +454,187 @@ impl LocaleDisplayNamesFormatter { } result } -} -fn get_locale_display_name<'a>( - locale: &Locale, - longest_matching_subtag: LongestMatchingSubtag, - locale_dn_formatter: &'a LocaleDisplayNamesFormatter, -) -> &'a str { - let LocaleDisplayNamesFormatter { - options, - locale_data, - language_data, - .. - } = locale_dn_formatter; - - let lang_id: LanguageIdentifier = match longest_matching_subtag { - LongestMatchingSubtag::LangRegion => (locale.id.language, None, locale.id.region).into(), - LongestMatchingSubtag::LangScript => (locale.id.language, locale.id.script, None).into(), - LongestMatchingSubtag::Lang => locale.id.language.into(), - }; - - // Check if the key exists in the locale_data first. - // Example: "en_GB", "nl_BE". - let mut ldn = match options.style { - Some(Style::Short) => locale_data - .get() - .short_names - .get_by(|uvstr| lang_id.strict_cmp(uvstr).reverse()), - Some(Style::Long) => locale_data - .get() - .long_names - .get_by(|uvstr| lang_id.strict_cmp(uvstr).reverse()), - Some(Style::Menu) => locale_data - .get() - .menu_names - .get_by(|uvstr| lang_id.strict_cmp(uvstr).reverse()), - _ => None, + /// For a given locale and the data, find the longest prefix of the string that exists as a key in the CLDR locale data. + pub fn find_longest_matching_subtag<'a>( + &self, + locale: &Locale, + ) -> LongestMatchingSubtag { + let LocaleDisplayNamesFormatter { locale_data, .. } = self; + + // NOTE: The subtag ordering of the canonical locale is `language_script_region + variants + extensions`. + // The logic to find the longest matching subtag is based on this ordering. + if let Some(script) = locale.id.script { + let lang_script_identifier: LanguageIdentifier = + (locale.id.language, Some(script), None).into(); + if locale_data + .get() + .names + .get_by(|uvstr| lang_script_identifier.strict_cmp(uvstr).reverse()) + .is_some() + { + return LongestMatchingSubtag::LangScript; + } + } + if let Some(region) = locale.id.region { + if locale.id.script.is_none() { + let lang_region_identifier: LanguageIdentifier = + (locale.id.language, None, Some(region)).into(); + if locale_data + .get() + .names + .get_by(|uvstr| lang_region_identifier.strict_cmp(uvstr).reverse()) + .is_some() + { + return LongestMatchingSubtag::LangRegion; + } + } + } + return LongestMatchingSubtag::Lang; } - .or_else(|| { - locale_data - .get() - .names - .get_by(|uvstr| lang_id.strict_cmp(uvstr).reverse()) - }); - - // At this point the key should exist in the language_data. - // Example: "en", "nl", "zh". - if ldn.is_none() { - ldn = match options.style { - Some(Style::Short) => language_data + + pub fn get_locale_display_name<'a>( + &'a self, + locale: &Locale, + longest_matching_subtag: LongestMatchingSubtag, + ) -> &'a str { + let LocaleDisplayNamesFormatter { + options, + locale_data, + language_data, + .. + } = self; + + let lang_id: LanguageIdentifier = match longest_matching_subtag { + LongestMatchingSubtag::LangRegion => (locale.id.language, None, locale.id.region).into(), + LongestMatchingSubtag::LangScript => (locale.id.language, locale.id.script, None).into(), + LongestMatchingSubtag::Lang => locale.id.language.into(), + }; + + // Check if the key exists in the locale_data first. + // Example: "en_GB", "nl_BE". + let mut ldn = match options.style { + Some(Style::Short) => locale_data .get() .short_names - .get(&lang_id.language.into_tinystr().to_unvalidated()), - Some(Style::Long) => language_data + .get_by(|uvstr| lang_id.strict_cmp(uvstr).reverse()), + Some(Style::Long) => locale_data .get() .long_names - .get(&lang_id.language.into_tinystr().to_unvalidated()), - Some(Style::Menu) => language_data + .get_by(|uvstr| lang_id.strict_cmp(uvstr).reverse()), + Some(Style::Menu) => locale_data .get() .menu_names - .get(&lang_id.language.into_tinystr().to_unvalidated()), + .get_by(|uvstr| lang_id.strict_cmp(uvstr).reverse()), _ => None, } .or_else(|| { - language_data + locale_data .get() .names - .get(&lang_id.language.into_tinystr().to_unvalidated()) + .get_by(|uvstr| lang_id.strict_cmp(uvstr).reverse()) }); - } - // Throw an error if the LDN is none as it is not possible to have a locale string without the language. - return ldn.expect("cannot parse locale displayname."); -} - -fn get_longest_qualifying_substrings<'a>( - locale: &Locale, - longest_matching_subtag: LongestMatchingSubtag, - locale_dn_formatter: &'a LocaleDisplayNamesFormatter, -) -> Vec<&'a str> { - let LocaleDisplayNamesFormatter { - options, - region_data, - script_data, - variant_data, - .. - } = locale_dn_formatter; - - let mut lqs: Vec<&str> = vec![]; - - if let Some(script) = locale.id.script { - // Ignore if the script was used to derive LDN. - if longest_matching_subtag != LongestMatchingSubtag::LangScript { - let scriptdisplay = match options.style { - Some(Style::Short) => script_data + + // At this point the key should exist in the language_data. + // Example: "en", "nl", "zh". + if ldn.is_none() { + ldn = match options.style { + Some(Style::Short) => language_data .get() .short_names - .get(&script.into_tinystr().to_unvalidated()), + .get(&lang_id.language.into_tinystr().to_unvalidated()), + Some(Style::Long) => language_data + .get() + .long_names + .get(&lang_id.language.into_tinystr().to_unvalidated()), + Some(Style::Menu) => language_data + .get() + .menu_names + .get(&lang_id.language.into_tinystr().to_unvalidated()), _ => None, } .or_else(|| { - script_data + language_data .get() .names - .get(&script.into_tinystr().to_unvalidated()) + .get(&lang_id.language.into_tinystr().to_unvalidated()) }); - if let Some(scriptdn) = scriptdisplay { - lqs.push(scriptdn); - } } + // Throw an error if the LDN is none as it is not possible to have a locale string without the language. + return ldn.expect("cannot parse locale displayname."); } - if let Some(region) = locale.id.region { - // Ignore if the region was used to derive LDN. - if longest_matching_subtag != LongestMatchingSubtag::LangRegion { - let regiondisplay = match options.style { - Some(Style::Short) => region_data - .get() - .short_names - .get(®ion.into_tinystr().to_unvalidated()), - _ => None, + fn get_longest_qualifying_substrings<'a>( + &'a self, + locale: &Locale, + longest_matching_subtag: LongestMatchingSubtag, + ) -> Vec<&'a str> { + let LocaleDisplayNamesFormatter { + options, + region_data, + script_data, + variant_data, + .. + } = self; + + let mut lqs: Vec<&str> = vec![]; + + if let Some(script) = locale.id.script { + // Ignore if the script was used to derive LDN. + if longest_matching_subtag != LongestMatchingSubtag::LangScript { + let scriptdisplay = match options.style { + Some(Style::Short) => script_data + .get() + .short_names + .get(&script.into_tinystr().to_unvalidated()), + _ => None, + } + .or_else(|| { + script_data + .get() + .names + .get(&script.into_tinystr().to_unvalidated()) + }); + if let Some(scriptdn) = scriptdisplay { + lqs.push(scriptdn); + } } - .or_else(|| { - region_data - .get() - .names - .get(®ion.into_tinystr().to_unvalidated()) - }); - - if let Some(regiondn) = regiondisplay { - lqs.push(regiondn); + } + + if let Some(region) = locale.id.region { + // Ignore if the region was used to derive LDN. + if longest_matching_subtag != LongestMatchingSubtag::LangRegion { + let regiondisplay = match options.style { + Some(Style::Short) => region_data + .get() + .short_names + .get(®ion.into_tinystr().to_unvalidated()), + _ => None, + } + .or_else(|| { + region_data + .get() + .names + .get(®ion.into_tinystr().to_unvalidated()) + }); + + if let Some(regiondn) = regiondisplay { + lqs.push(regiondn); + } } } - } - - for &variant_key in locale.id.variants.iter() { - if let Some(variant_dn) = variant_data - .get() - .names - .get(&variant_key.into_tinystr().to_unvalidated()) - { - lqs.push(variant_dn); + + for &variant_key in locale.id.variants.iter() { + if let Some(variant_dn) = variant_data + .get() + .names + .get(&variant_key.into_tinystr().to_unvalidated()) + { + lqs.push(variant_dn); + } } + + return lqs; } - - return lqs; } + diff --git a/experimental/displaynames/tests/tests.rs b/experimental/displaynames/tests/tests.rs index f5b79b2a4e4..f79d556ae90 100644 --- a/experimental/displaynames/tests/tests.rs +++ b/experimental/displaynames/tests/tests.rs @@ -42,8 +42,7 @@ fn test_concatenate() { let locale = locale!("en-001"); let options: DisplayNamesOptions = Default::default(); - let display_name = LocaleDisplayNamesFormatter::try_new_unstable( - &icu_testdata::unstable(), + let display_name = LocaleDisplayNamesFormatter::try_new( &locale.into(), options, ) From 6989da063735d74df167955aa330fe531fea49f8 Mon Sep 17 00:00:00 2001 From: snktd Date: Mon, 21 Aug 2023 19:31:11 -0700 Subject: [PATCH 07/13] Removing the unused enum --- experimental/displaynames/src/displaynames.rs | 118 ++++++++---------- experimental/displaynames/tests/tests.rs | 7 +- 2 files changed, 52 insertions(+), 73 deletions(-) diff --git a/experimental/displaynames/src/displaynames.rs b/experimental/displaynames/src/displaynames.rs index 6b195032a55..e0c30ea2bf2 100644 --- a/experimental/displaynames/src/displaynames.rs +++ b/experimental/displaynames/src/displaynames.rs @@ -355,21 +355,6 @@ pub struct LocaleDisplayNamesFormatter { // transforms_data: DataPayload, } -// LongestMatching subtag is a longest substring of a given locale that exists as a key in the CLDR locale data. -// This is used for implementing Locale Display Name Algorithm. -#[derive(PartialEq, Clone, Copy)] -pub enum LongestMatchingSubtag { - // Longest matching subtag of type ${lang}-${region}. - // Example: "de-ET", "en-GB" - LangRegion, - // Longest matching subtag of type ${lang}-${script}. - // Example: "hi-Latn", "zh-Hans" - LangScript, - // Longest matching subtag of type ${lang} - // Example: "en", "hi" - Lang, -} - impl LocaleDisplayNamesFormatter { icu_provider::gen_any_buffer_data_constructors!( locale: include, @@ -423,16 +408,15 @@ impl LocaleDisplayNamesFormatter { /// // TODO: Make this return a writeable instead of using alloc pub fn of<'a, 'b: 'a, 'c: 'a>(&'b self, locale: &'c Locale) -> Cow<'a, str> { - let longest_matching_subtag = - self.find_longest_matching_subtag(&locale); + let longest_matching_identifier = self.find_longest_matching_subtag(&locale); // Step - 1: Construct a locale display name string (LDN). // Find the displayname for the longest_matching_subtag which was derived above. - let ldn = self.get_locale_display_name(&locale, longest_matching_subtag); + let ldn = self.get_locale_display_name(&locale, &longest_matching_identifier); // Step - 2: Construct a vector of longest qualifying substrings (LQS). // Find the displayname for the remaining locale if exists. - let lqs = self.get_longest_qualifying_substrings(&locale, longest_matching_subtag); + let lqs = self.get_longest_qualifying_substrings(&locale, &longest_matching_identifier); // Step - 3: Return the displayname based on the size of LQS. let mut result = Cow::Borrowed(ldn); @@ -456,10 +440,7 @@ impl LocaleDisplayNamesFormatter { } /// For a given locale and the data, find the longest prefix of the string that exists as a key in the CLDR locale data. - pub fn find_longest_matching_subtag<'a>( - &self, - locale: &Locale, - ) -> LongestMatchingSubtag { + pub fn find_longest_matching_subtag(&self, locale: &Locale) -> LanguageIdentifier { let LocaleDisplayNamesFormatter { locale_data, .. } = self; // NOTE: The subtag ordering of the canonical locale is `language_script_region + variants + extensions`. @@ -473,7 +454,7 @@ impl LocaleDisplayNamesFormatter { .get_by(|uvstr| lang_script_identifier.strict_cmp(uvstr).reverse()) .is_some() { - return LongestMatchingSubtag::LangScript; + return lang_script_identifier; } } if let Some(region) = locale.id.region { @@ -486,17 +467,17 @@ impl LocaleDisplayNamesFormatter { .get_by(|uvstr| lang_region_identifier.strict_cmp(uvstr).reverse()) .is_some() { - return LongestMatchingSubtag::LangRegion; + return lang_region_identifier; } } } - return LongestMatchingSubtag::Lang; + return (locale.id.language, None, None).into(); } - pub fn get_locale_display_name<'a>( + fn get_locale_display_name<'a>( &'a self, - locale: &Locale, - longest_matching_subtag: LongestMatchingSubtag, + locale: &'a Locale, + longest_matching_identifier: &LanguageIdentifier, ) -> &'a str { let LocaleDisplayNamesFormatter { options, @@ -504,70 +485,72 @@ impl LocaleDisplayNamesFormatter { language_data, .. } = self; - - let lang_id: LanguageIdentifier = match longest_matching_subtag { - LongestMatchingSubtag::LangRegion => (locale.id.language, None, locale.id.region).into(), - LongestMatchingSubtag::LangScript => (locale.id.language, locale.id.script, None).into(), - LongestMatchingSubtag::Lang => locale.id.language.into(), - }; - + // Check if the key exists in the locale_data first. // Example: "en_GB", "nl_BE". let mut ldn = match options.style { Some(Style::Short) => locale_data .get() .short_names - .get_by(|uvstr| lang_id.strict_cmp(uvstr).reverse()), + .get_by(|uvstr| longest_matching_identifier.strict_cmp(uvstr).reverse()), Some(Style::Long) => locale_data .get() .long_names - .get_by(|uvstr| lang_id.strict_cmp(uvstr).reverse()), + .get_by(|uvstr| longest_matching_identifier.strict_cmp(uvstr).reverse()), Some(Style::Menu) => locale_data .get() .menu_names - .get_by(|uvstr| lang_id.strict_cmp(uvstr).reverse()), + .get_by(|uvstr| longest_matching_identifier.strict_cmp(uvstr).reverse()), _ => None, } .or_else(|| { locale_data .get() .names - .get_by(|uvstr| lang_id.strict_cmp(uvstr).reverse()) + .get_by(|uvstr| longest_matching_identifier.strict_cmp(uvstr).reverse()) }); - + // At this point the key should exist in the language_data. // Example: "en", "nl", "zh". if ldn.is_none() { ldn = match options.style { - Some(Style::Short) => language_data - .get() - .short_names - .get(&lang_id.language.into_tinystr().to_unvalidated()), - Some(Style::Long) => language_data - .get() - .long_names - .get(&lang_id.language.into_tinystr().to_unvalidated()), - Some(Style::Menu) => language_data - .get() - .menu_names - .get(&lang_id.language.into_tinystr().to_unvalidated()), + Some(Style::Short) => language_data.get().short_names.get( + &longest_matching_identifier + .language + .into_tinystr() + .to_unvalidated(), + ), + Some(Style::Long) => language_data.get().long_names.get( + &longest_matching_identifier + .language + .into_tinystr() + .to_unvalidated(), + ), + Some(Style::Menu) => language_data.get().menu_names.get( + &longest_matching_identifier + .language + .into_tinystr() + .to_unvalidated(), + ), _ => None, } .or_else(|| { - language_data - .get() - .names - .get(&lang_id.language.into_tinystr().to_unvalidated()) + language_data.get().names.get( + &longest_matching_identifier + .language + .into_tinystr() + .to_unvalidated(), + ) }); } // Throw an error if the LDN is none as it is not possible to have a locale string without the language. - return ldn.expect("cannot parse locale displayname."); + return ldn.unwrap_or(locale.id.language.as_str()); } fn get_longest_qualifying_substrings<'a>( &'a self, locale: &Locale, - longest_matching_subtag: LongestMatchingSubtag, + longest_matching_identifier: &LanguageIdentifier, ) -> Vec<&'a str> { let LocaleDisplayNamesFormatter { options, @@ -576,12 +559,12 @@ impl LocaleDisplayNamesFormatter { variant_data, .. } = self; - + let mut lqs: Vec<&str> = vec![]; - + if let Some(script) = locale.id.script { // Ignore if the script was used to derive LDN. - if longest_matching_subtag != LongestMatchingSubtag::LangScript { + if longest_matching_identifier.script.is_none() { let scriptdisplay = match options.style { Some(Style::Short) => script_data .get() @@ -600,10 +583,10 @@ impl LocaleDisplayNamesFormatter { } } } - + if let Some(region) = locale.id.region { // Ignore if the region was used to derive LDN. - if longest_matching_subtag != LongestMatchingSubtag::LangRegion { + if longest_matching_identifier.region.is_none() { let regiondisplay = match options.style { Some(Style::Short) => region_data .get() @@ -617,13 +600,13 @@ impl LocaleDisplayNamesFormatter { .names .get(®ion.into_tinystr().to_unvalidated()) }); - + if let Some(regiondn) = regiondisplay { lqs.push(regiondn); } } } - + for &variant_key in locale.id.variants.iter() { if let Some(variant_dn) = variant_data .get() @@ -633,8 +616,7 @@ impl LocaleDisplayNamesFormatter { lqs.push(variant_dn); } } - + return lqs; } } - diff --git a/experimental/displaynames/tests/tests.rs b/experimental/displaynames/tests/tests.rs index f79d556ae90..2b91b15d6f6 100644 --- a/experimental/displaynames/tests/tests.rs +++ b/experimental/displaynames/tests/tests.rs @@ -42,11 +42,8 @@ fn test_concatenate() { let locale = locale!("en-001"); let options: DisplayNamesOptions = Default::default(); - let display_name = LocaleDisplayNamesFormatter::try_new( - &locale.into(), - options, - ) - .expect("Data should load successfully"); + let display_name = LocaleDisplayNamesFormatter::try_new(&locale.into(), options) + .expect("Data should load successfully"); assert_eq!(display_name.of(cas.input_1), cas.expected); } From 6855cebe1f232223355422295d8db5fb8d073cb7 Mon Sep 17 00:00:00 2001 From: snktd Date: Mon, 21 Aug 2023 19:59:02 -0700 Subject: [PATCH 08/13] Adding a few more test cases --- experimental/displaynames/tests/tests.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/experimental/displaynames/tests/tests.rs b/experimental/displaynames/tests/tests.rs index 2b91b15d6f6..85d64926ad5 100644 --- a/experimental/displaynames/tests/tests.rs +++ b/experimental/displaynames/tests/tests.rs @@ -31,6 +31,18 @@ fn test_concatenate() { input_1: &Locale::from_str("en-Latn-GB-fonipa-scouse").unwrap(), expected: "English (Latin, United Kingdom, IPA Phonetics, Scouse)", }, + TestCase { + input_1: &Locale::from_str("de-Latn-CH").unwrap(), + expected: "German (Latin, Switzerland)", + }, + TestCase { + input_1: &Locale::from_str("zh-Hans-CN").unwrap(), + expected: "Simplified Chinese (China)", + }, + TestCase { + input_1: &Locale::from_str("es-419-fonipa").unwrap(), + expected: "Latin American Spanish (IPA Phonetics)", + }, // TODO: Uncomment this when the fallback is properly supported. // TestCase { // input_1: &locale!("xx-YY"), From 0f7c4acf37f0f5c7f980d405dd088bec44215684 Mon Sep 17 00:00:00 2001 From: Robert Bastian Date: Tue, 22 Aug 2023 10:56:59 +0200 Subject: [PATCH 09/13] fallback --- experimental/displaynames/src/displaynames.rs | 38 +++++++++---------- experimental/displaynames/tests/tests.rs | 9 ++--- 2 files changed, 22 insertions(+), 25 deletions(-) diff --git a/experimental/displaynames/src/displaynames.rs b/experimental/displaynames/src/displaynames.rs index e0c30ea2bf2..7b7f1281eb7 100644 --- a/experimental/displaynames/src/displaynames.rs +++ b/experimental/displaynames/src/displaynames.rs @@ -339,6 +339,8 @@ impl LanguageDisplayNames { /// assert_eq!(display_name.of(&locale!("en-GB")), "British English"); /// assert_eq!(display_name.of(&locale!("en")), "English"); /// assert_eq!(display_name.of(&locale!("en-MX")), "English (Mexico)"); +/// assert_eq!(display_name.of(&locale!("xx-YY")), "xx (YY)"); +/// assert_eq!(display_name.of(&locale!("xx")), "xx"); /// ``` pub struct LocaleDisplayNamesFormatter { options: DisplayNamesOptions, @@ -350,7 +352,7 @@ pub struct LocaleDisplayNamesFormatter { region_data: DataPayload, variant_data: DataPayload, // key_data: DataPayload, - // measuerment_data: DataPayload, + // measurement_data: DataPayload, // subdivisions_data: DataPayload, // transforms_data: DataPayload, } @@ -549,8 +551,8 @@ impl LocaleDisplayNamesFormatter { fn get_longest_qualifying_substrings<'a>( &'a self, - locale: &Locale, - longest_matching_identifier: &LanguageIdentifier, + locale: &'a Locale, + longest_matching_identifier: &'a LanguageIdentifier, ) -> Vec<&'a str> { let LocaleDisplayNamesFormatter { options, @@ -560,9 +562,9 @@ impl LocaleDisplayNamesFormatter { .. } = self; - let mut lqs: Vec<&str> = vec![]; + let mut lqs: Vec<&'a str> = vec![]; - if let Some(script) = locale.id.script { + if let Some(script) = &locale.id.script { // Ignore if the script was used to derive LDN. if longest_matching_identifier.script.is_none() { let scriptdisplay = match options.style { @@ -578,13 +580,11 @@ impl LocaleDisplayNamesFormatter { .names .get(&script.into_tinystr().to_unvalidated()) }); - if let Some(scriptdn) = scriptdisplay { - lqs.push(scriptdn); - } + lqs.push(scriptdisplay.unwrap_or(script.as_str())); } } - if let Some(region) = locale.id.region { + if let Some(region) = &locale.id.region { // Ignore if the region was used to derive LDN. if longest_matching_identifier.region.is_none() { let regiondisplay = match options.style { @@ -601,20 +601,18 @@ impl LocaleDisplayNamesFormatter { .get(®ion.into_tinystr().to_unvalidated()) }); - if let Some(regiondn) = regiondisplay { - lqs.push(regiondn); - } + lqs.push(regiondisplay.unwrap_or(region.as_str())); } } - for &variant_key in locale.id.variants.iter() { - if let Some(variant_dn) = variant_data - .get() - .names - .get(&variant_key.into_tinystr().to_unvalidated()) - { - lqs.push(variant_dn); - } + for variant_key in locale.id.variants.iter() { + lqs.push( + variant_data + .get() + .names + .get(&variant_key.into_tinystr().to_unvalidated()) + .unwrap_or(variant_key.as_str()), + ); } return lqs; diff --git a/experimental/displaynames/tests/tests.rs b/experimental/displaynames/tests/tests.rs index 85d64926ad5..ad9bbe96c8f 100644 --- a/experimental/displaynames/tests/tests.rs +++ b/experimental/displaynames/tests/tests.rs @@ -43,11 +43,10 @@ fn test_concatenate() { input_1: &Locale::from_str("es-419-fonipa").unwrap(), expected: "Latin American Spanish (IPA Phonetics)", }, - // TODO: Uncomment this when the fallback is properly supported. - // TestCase { - // input_1: &locale!("xx-YY"), - // expected: "xx (YY)", - // }, + TestCase { + input_1: &locale!("xx-YY"), + expected: "xx (YY)", + }, ]; for cas in &cases { // TODO: Add tests for different data locales. From 94b311a46bd15bedb360d326c36ed12d9a38f1cd Mon Sep 17 00:00:00 2001 From: snktd Date: Tue, 22 Aug 2023 09:43:16 -0700 Subject: [PATCH 10/13] Address comments --- experimental/displaynames/src/displaynames.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/experimental/displaynames/src/displaynames.rs b/experimental/displaynames/src/displaynames.rs index 7b7f1281eb7..5da32489665 100644 --- a/experimental/displaynames/src/displaynames.rs +++ b/experimental/displaynames/src/displaynames.rs @@ -443,14 +443,13 @@ impl LocaleDisplayNamesFormatter { /// For a given locale and the data, find the longest prefix of the string that exists as a key in the CLDR locale data. pub fn find_longest_matching_subtag(&self, locale: &Locale) -> LanguageIdentifier { - let LocaleDisplayNamesFormatter { locale_data, .. } = self; - // NOTE: The subtag ordering of the canonical locale is `language_script_region + variants + extensions`. // The logic to find the longest matching subtag is based on this ordering. if let Some(script) = locale.id.script { let lang_script_identifier: LanguageIdentifier = (locale.id.language, Some(script), None).into(); - if locale_data + if self + .locale_data .get() .names .get_by(|uvstr| lang_script_identifier.strict_cmp(uvstr).reverse()) @@ -463,7 +462,8 @@ impl LocaleDisplayNamesFormatter { if locale.id.script.is_none() { let lang_region_identifier: LanguageIdentifier = (locale.id.language, None, Some(region)).into(); - if locale_data + if self + .locale_data .get() .names .get_by(|uvstr| lang_region_identifier.strict_cmp(uvstr).reverse()) @@ -545,7 +545,7 @@ impl LocaleDisplayNamesFormatter { ) }); } - // Throw an error if the LDN is none as it is not possible to have a locale string without the language. + // Fallback on language subtag in LanguageIdentifier id the key is not found in CLDR data. return ldn.unwrap_or(locale.id.language.as_str()); } From 2bc861b3d5da6fc523a091bf08bfa653c83c0ca7 Mon Sep 17 00:00:00 2001 From: snktd Date: Tue, 22 Aug 2023 14:24:43 -0700 Subject: [PATCH 11/13] Running ci-job-tidyi --- experimental/displaynames/tests/tests.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/experimental/displaynames/tests/tests.rs b/experimental/displaynames/tests/tests.rs index ad9bbe96c8f..0fdf5c9efa3 100644 --- a/experimental/displaynames/tests/tests.rs +++ b/experimental/displaynames/tests/tests.rs @@ -1,3 +1,7 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + use icu_displaynames::{DisplayNamesOptions, LocaleDisplayNamesFormatter}; use icu_locid::locale; use icu_locid::Locale; From fed16225e46fecea097a4ddfc0f845d34fc21f12 Mon Sep 17 00:00:00 2001 From: snktd Date: Tue, 22 Aug 2023 16:05:04 -0700 Subject: [PATCH 12/13] Making clippy happy and adding one more test case. --- experimental/displaynames/src/displaynames.rs | 18 +++++++++--------- experimental/displaynames/tests/tests.rs | 4 ++++ 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/experimental/displaynames/src/displaynames.rs b/experimental/displaynames/src/displaynames.rs index 5da32489665..9d11615b12e 100644 --- a/experimental/displaynames/src/displaynames.rs +++ b/experimental/displaynames/src/displaynames.rs @@ -406,23 +406,24 @@ impl LocaleDisplayNamesFormatter { /// Returns the display name of a locale. /// This implementation is based on the algorithm described in - /// https://www.unicode.org/reports/tr35/tr35-general.html#locale_display_name_algorithm + /// `` /// // TODO: Make this return a writeable instead of using alloc pub fn of<'a, 'b: 'a, 'c: 'a>(&'b self, locale: &'c Locale) -> Cow<'a, str> { - let longest_matching_identifier = self.find_longest_matching_subtag(&locale); + let longest_matching_identifier = self.find_longest_matching_subtag(locale); // Step - 1: Construct a locale display name string (LDN). // Find the displayname for the longest_matching_subtag which was derived above. - let ldn = self.get_locale_display_name(&locale, &longest_matching_identifier); + let ldn = self.get_locale_display_name(locale, &longest_matching_identifier); // Step - 2: Construct a vector of longest qualifying substrings (LQS). // Find the displayname for the remaining locale if exists. - let lqs = self.get_longest_qualifying_substrings(&locale, &longest_matching_identifier); + let lqs = self.get_longest_qualifying_substrings(locale, &longest_matching_identifier); // Step - 3: Return the displayname based on the size of LQS. let mut result = Cow::Borrowed(ldn); - if lqs.len() > 0 { + #[allow(clippy::indexing_slicing)] // indexes in range + if !lqs.is_empty() { let mut output = String::with_capacity( result.len() + " (".len() + lqs.iter().map(|s| ", ".len() + s.len()).sum::() - ", ".len() @@ -435,7 +436,7 @@ impl LocaleDisplayNamesFormatter { output.push_str(", "); output.push_str(lqs); } - output.push_str(")"); + output.push(')'); result = Cow::Owned(output); } result @@ -473,7 +474,7 @@ impl LocaleDisplayNamesFormatter { } } } - return (locale.id.language, None, None).into(); + (locale.id.language, None, None).into() } fn get_locale_display_name<'a>( @@ -614,7 +615,6 @@ impl LocaleDisplayNamesFormatter { .unwrap_or(variant_key.as_str()), ); } - - return lqs; + lqs } } diff --git a/experimental/displaynames/tests/tests.rs b/experimental/displaynames/tests/tests.rs index 0fdf5c9efa3..e85b30a22c7 100644 --- a/experimental/displaynames/tests/tests.rs +++ b/experimental/displaynames/tests/tests.rs @@ -47,6 +47,10 @@ fn test_concatenate() { input_1: &Locale::from_str("es-419-fonipa").unwrap(), expected: "Latin American Spanish (IPA Phonetics)", }, + TestCase { + input_1: &Locale::from_str("es-Latn-419").unwrap(), + expected: "Spanish (Latin, Latin America)", + }, TestCase { input_1: &locale!("xx-YY"), expected: "xx (YY)", From a1d9fdccb39d570d3b17a0f6d43b39b5b7b8a5e7 Mon Sep 17 00:00:00 2001 From: snktd Date: Tue, 22 Aug 2023 18:27:42 -0700 Subject: [PATCH 13/13] Addressing minor nit comment --- experimental/displaynames/src/displaynames.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experimental/displaynames/src/displaynames.rs b/experimental/displaynames/src/displaynames.rs index 9d11615b12e..47111428638 100644 --- a/experimental/displaynames/src/displaynames.rs +++ b/experimental/displaynames/src/displaynames.rs @@ -406,7 +406,7 @@ impl LocaleDisplayNamesFormatter { /// Returns the display name of a locale. /// This implementation is based on the algorithm described in - /// `` + /// /// // TODO: Make this return a writeable instead of using alloc pub fn of<'a, 'b: 'a, 'c: 'a>(&'b self, locale: &'c Locale) -> Cow<'a, str> {