diff --git a/CHANGELOG.md b/CHANGELOG.md index 86d8a9ff5b3..e6875ad8976 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,9 @@ - New `DateTime::local_unix_epoch()` convenience constructor (https://github.com/unicode-org/icu4x/pull/4479) - `icu_datetime` - `FormattedDateTime` and `FormattedZonedDateTime` now implement `Clone` and `Copy` (https://github.com/unicode-org/icu4x/pull/4476) + - `icu_normalizer` + - Fix normalization of character whose decomposition contains more than one starter and ends with a non-starter followed by a non-starter + with a lower Canonical Combining Class than the last character of the decomposition. (https://github.com/unicode-org/icu4x/pull/4530) - `icu_properties` - Add `Aran` script code (https://github.com/unicode-org/icu4x/pull/4426) - `icu_segmenter` @@ -51,7 +54,7 @@ - General - MSRV is now 1.67 - + - Components - Compiled data updated to CLDR 44 and ICU 74 (https://github.com/unicode-org/icu4x/pull/4245) - `icu_calendar` diff --git a/components/normalizer/src/lib.rs b/components/normalizer/src/lib.rs index 067b15d933f..9d71e72287a 100644 --- a/components/normalizer/src/lib.rs +++ b/components/normalizer/src/lib.rs @@ -637,7 +637,7 @@ where i += 1; // Half-width kana and iota subscript don't occur in the tails // of these multicharacter decompositions. - if decomposition_starts_with_non_starter(trie_value) { + if !decomposition_starts_with_non_starter(trie_value) { combining_start = i; } } @@ -676,7 +676,7 @@ where i += 1; // Half-width kana and iota subscript don't occur in the tails // of these multicharacter decompositions. - if decomposition_starts_with_non_starter(trie_value) { + if !decomposition_starts_with_non_starter(trie_value) { combining_start = i; } } diff --git a/components/normalizer/tests/tests.rs b/components/normalizer/tests/tests.rs index d95486e5e13..f6d1a1e49c5 100644 --- a/components/normalizer/tests/tests.rs +++ b/components/normalizer/tests/tests.rs @@ -1308,6 +1308,28 @@ fn test_utf16_basic() { ); } +#[test] +fn test_accented_digraph() { + let normalizer: DecomposingNormalizer = DecomposingNormalizer::new_nfkd(); + assert_eq!( + normalizer.normalize("\u{01C4}\u{0323}"), + "DZ\u{0323}\u{030C}" + ); + assert_eq!( + normalizer.normalize("DZ\u{030C}\u{0323}"), + "DZ\u{0323}\u{030C}" + ); +} + +#[test] +fn test_ddd() { + let normalizer: DecomposingNormalizer = DecomposingNormalizer::new_nfd(); + assert_eq!( + normalizer.normalize("\u{0DDD}\u{0334}"), + "\u{0DD9}\u{0DCF}\u{0334}\u{0DCA}" + ); +} + #[test] fn test_is_normalized() { let nfd: DecomposingNormalizer = DecomposingNormalizer::new_nfd();