From ecc4b3d81b75a5fe3a4911eb1cc68c43fdae1e2d Mon Sep 17 00:00:00 2001 From: Alfan Nur Fauzan Date: Thu, 21 Mar 2024 06:39:38 +0700 Subject: [PATCH] transliteration improvement for v0.11.3 (#38) * add more test cases * adjust the transliteration * handle "kitabi la roiba" * compress contextual_map patterns * remove some test cases * add test cases * bump version --- Cargo.toml | 2 +- src/quranize.rs | 42 +++++++++++++++++++++++++------- src/quranize/transliterations.rs | 17 ++++--------- 3 files changed, 39 insertions(+), 22 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index ddd941b..f1a50f5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "quranize" -version = "0.11.2" +version = "0.11.3" authors = ["Alfan Nur Fauzan "] edition = "2021" description = "Encoding transliterations into Quran forms." diff --git a/src/quranize.rs b/src/quranize.rs index e9e8aeb..83e1420 100644 --- a/src/quranize.rs +++ b/src/quranize.rs @@ -239,6 +239,8 @@ mod tests { assert_eq!(q.e("yukhodiun"), vec!["يُخٰدِعونَ"]); assert_eq!(q.e("indallah"), vec!["عِندَ اللَّهِ", "عِندِ اللَّهِ"]); assert_eq!(q.e("alimul ghoibi"), vec!["عٰلِمُ الغَيبِ"]); + assert_eq!(q.e("kaana dhoifa"), vec!["كانَ ضَعيفًا"]); + assert_eq!(q.e("kitabi la roiba"), vec!["الكِتٰبِ لا رَيبَ"]); } #[test] @@ -290,10 +292,7 @@ mod tests { #[test] fn test_quranize_misc() { let q = Quranize::new(70); - assert_eq!(q.encode("bismillah")[0].1.len(), 13); assert_eq!(q.encode("bismillah")[0].2, 3); - assert_eq!(q.encode("arrohman").len(), 3); - assert_eq!(q.encode("arrohman")[0].1.len(), 10); assert_eq!( q.encode("alhamdu")[0].1, vec!["a", "l", "h", "a", "m", "d", "u"] @@ -302,12 +301,37 @@ mod tests { q.encode("arrohman")[0].1, vec!["a", "", "r", "r", "o", "h", "m", "a", "n", ""] ); - let result = &q.encode("masyaallah")[0]; - assert_eq!(result.0.chars().count(), result.1.len()); - assert_eq!( - result.1, - vec!["m", "a", "", "sy", "a", "a", "", "", "", "", "l", "l", "a", "h", ""] - ); + { + let r = &q.encode("masyaallah")[0]; + assert_eq!(r.0.chars().count(), r.1.len()); + assert_eq!( + r.1, + vec!["m", "a", "", "sy", "a", "a", "", "", "", "", "l", "l", "a", "h", ""] + ); + } + { + let r = &q.encode("birobbinnas")[0]; + assert_eq!( + r.1.iter().zip(r.0.chars()).collect::>(), + vec![ + (&"b", 'ب',), + (&"i", '\u{650}',), + (&"r", 'ر',), + (&"o", '\u{64e}',), + (&"b", 'ب',), + (&"b", '\u{651}',), + (&"i", '\u{650}',), + (&"", ' ',), + (&"", 'ا',), + (&"", 'ل',), + (&"n", 'ن',), + (&"n", '\u{651}',), + (&"a", 'ا',), + (&"s", 'س',), + (&"", '\u{650}',), + ] + ); + } } #[test] diff --git a/src/quranize/transliterations.rs b/src/quranize/transliterations.rs index 6717d9d..8de34ef 100644 --- a/src/quranize/transliterations.rs +++ b/src/quranize/transliterations.rs @@ -64,18 +64,11 @@ pub(super) fn map(c: char) -> &'static [&'static str] { pub(super) fn contextual_map(c0: char, c1: char) -> &'static [&'static str] { match (c0, c1) { - (SPACE, LETTER_ALEF) - | (LETTER_HAMZA, LETTER_ALEF) - | (LETTER_ALEF, LETTER_LAM) - | (LETTER_AIN, LETTER_WAW) - | (LETTER_AIN, LETTER_SUPERSCRIPT_ALEF) - | (LETTER_WAW, LETTER_ALEF) - | (FATHATAN, LETTER_ALEF) - | (DAMMA, LETTER_WAW) - | (KASRA, LETTER_ALEF) - | (KASRA, LETTER_LAM) - | (HAMZA_ABOVE, LETTER_ALEF) => &[""], - ('\0', LETTER_ALEF) => &["u", "i"], + (SPACE | LETTER_HAMZA | LETTER_WAW | FATHATAN | KASRA | HAMZA_ABOVE, LETTER_ALEF) + | (LETTER_ALEF | KASRA, LETTER_LAM) + | (LETTER_AIN, LETTER_WAW | LETTER_YEH | LETTER_SUPERSCRIPT_ALEF) + | (DAMMA, LETTER_WAW) => &[""], + ('\0', LETTER_ALEF) => &["u", "i", ""], (_, SHADDA) => map(c0), _ => &[], }