unicode-org · hsivonen · Jan 18, 2024 · Jan 17, 2024 · Jan 17, 2024 · Jan 17, 2024
@@ -9,6 +9,9 @@
     - New `DateTime::local_unix_epoch()` convenience constructor (https://github.com/unicode-org/icu4x/pull/4479)
   - `icu_datetime`
     - `FormattedDateTime` and `FormattedZonedDateTime` now implement `Clone` and `Copy` (https://github.com/unicode-org/icu4x/pull/4476)
+  - `icu_normalizer`
+    - Fix normalization of character whose decomposition contains more than one starter and ends with a non-starter followed by a non-starter
+      with a lower Canonical Combining Class than the last character of the decomposition. (https://github.com/unicode-org/icu4x/pull/4530)
   - `icu_properties`
     - Add `Aran` script code (https://github.com/unicode-org/icu4x/pull/4426)
   - `icu_segmenter`
@@ -51,7 +54,7 @@
 
 - General
   - MSRV is now 1.67
- 
+
 - Components
     - Compiled data updated to CLDR 44 and ICU 74 (https://github.com/unicode-org/icu4x/pull/4245)
     - `icu_calendar`

@@ -637,7 +637,7 @@ where
                 i += 1;
                 // Half-width kana and iota subscript don't occur in the tails
                 // of these multicharacter decompositions.
-                if decomposition_starts_with_non_starter(trie_value) {
+                if !decomposition_starts_with_non_starter(trie_value) {
                     combining_start = i;
                 }
             }
@@ -676,7 +676,7 @@ where
                 i += 1;
                 // Half-width kana and iota subscript don't occur in the tails
                 // of these multicharacter decompositions.
-                if decomposition_starts_with_non_starter(trie_value) {
+                if !decomposition_starts_with_non_starter(trie_value) {
                     combining_start = i;
                 }
             }

@@ -1308,6 +1308,28 @@ fn test_utf16_basic() {
     );
 }
 
+#[test]
+fn test_accented_digraph() {
+    let normalizer: DecomposingNormalizer = DecomposingNormalizer::new_nfkd();
+    assert_eq!(
+        normalizer.normalize("\u{01C4}\u{0323}"),
+        "DZ\u{0323}\u{030C}"
+    );
+    assert_eq!(
+        normalizer.normalize("DZ\u{030C}\u{0323}"),
+        "DZ\u{0323}\u{030C}"
+    );
+}
+
+#[test]
+fn test_ddd() {
+    let normalizer: DecomposingNormalizer = DecomposingNormalizer::new_nfd();
+    assert_eq!(
+        normalizer.normalize("\u{0DDD}\u{0334}"),
+        "\u{0DD9}\u{0DCF}\u{0334}\u{0DCA}"
+    );
+}
+
 #[test]
 fn test_is_normalized() {
     let nfd: DecomposingNormalizer = DecomposingNormalizer::new_nfd();