diff --git a/Cargo.toml b/Cargo.toml index db0bb56..14a3b2b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "quranize" -version = "0.10.4" +version = "0.10.5" authors = ["Alfan Nur Fauzan "] edition = "2021" description = "Encoding transliterations into Quran forms." diff --git a/src/lib.rs b/src/lib.rs index 5fbc67d..9c4c74f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -14,7 +14,7 @@ //! //! ``` //! let q = quranize::Quranize::default(); -//! assert_eq!(q.encode("alhamdulillah").first().unwrap().0, "الحمد لله"); +//! assert_eq!(q.encode("alhamdulillah").first().unwrap().0, "الحمد للّه"); //! ``` //! //! ## Getting an aya text given surah number and ayah number @@ -60,7 +60,7 @@ impl Default for Quranize { /// /// ``` /// let q = quranize::Quranize::default(); // the same with `Quranize::new(usize::MAX)` - /// assert_eq!(q.encode("masyaallah").first().unwrap().0, "ما شاء الله"); + /// assert_eq!(q.encode("masyaallah").first().unwrap().0, "ما شاء اللّه"); /// ``` fn default() -> Self { Self::new(usize::MAX) @@ -77,7 +77,7 @@ impl Quranize { /// /// ``` /// let q = quranize::Quranize::new(35); - /// assert_eq!(q.encode("masyaallah").first().unwrap().0, "ما شاء الله"); + /// assert_eq!(q.encode("masyaallah").first().unwrap().0, "ما شاء اللّه"); /// let q = quranize::Quranize::new(1); /// assert_eq!(q.encode("masyaallah").first(), None); /// ``` @@ -215,11 +215,16 @@ impl Quranize { #[cfg(test)] mod tests { + use crate::transliterations::TASYDID; + use super::*; impl Quranize { fn e(&self, text: &str) -> Vec { - self.encode(text).into_iter().map(|(q, _, _)| q).collect() + self.encode(text) + .into_iter() + .map(|(q, _, _)| q.chars().filter(|&c| c != TASYDID).collect()) + .collect() } } @@ -235,7 +240,7 @@ mod tests { assert_eq!(q.e("birobbinnas"), vec!["برب الناس"]); assert_eq!(q.e("inna anzalnahu"), vec!["إنا أنزلناه"]); assert_eq!(q.e("wa'tasimu"), vec!["واعتصموا"]); - assert_eq!(q.e("wabarro"), vec!["وبرا", "وبئر"]); + assert_eq!(q.e("wabarro"), vec!["وبرا"]); assert_eq!(q.e("idza qodho"), vec!["إذا قضى"]); assert_eq!(q.e("masyaallah"), vec!["ما شاء الله"]); assert_eq!(q.e("illa man taaba"), vec!["إلا من تاب"]); @@ -248,6 +253,7 @@ mod tests { assert_eq!(q.e("undur kaifa"), vec!["انظر كيف"]); assert_eq!(q.e("lirrohman"), vec!["للرحمن"]); assert_eq!(q.e("wantum muslimun"), vec!["وأنتم مسلمون"]); + assert_eq!(q.e("laa yukallifullah"), vec!["لا يكلف الله"]); assert_eq!( q.e("bismillahirrohmanirrohiim"), @@ -279,26 +285,26 @@ mod tests { assert_eq!(q.e("kaaaf haa yaa aiiin shoood"), vec!["كهيعص"]); assert_eq!(q.e("kaf ha ya 'ain shod"), vec!["كهيعص"]); - assert_eq!(q.locations_index.len(), 685_770); + assert_eq!(q.locations_index.len(), 686_059); } #[test] fn test_quranize_misc() { let q = Quranize::new(23); - assert_eq!(q.encode("bismillah")[0].1.len(), 8); + assert_eq!(q.encode("bismillah")[0].1.len(), 9); assert_eq!(q.encode("bismillah")[0].2, 3); assert_eq!(q.encode("arrohman").len(), 1); - assert_eq!(q.encode("arrohman")[0].1.len(), 6); + assert_eq!(q.encode("arrohman")[0].1.len(), 7); assert_eq!(q.encode("alhamdu")[0].1, vec!["al", "ha", "m", "du"]); assert_eq!( q.encode("arrohman")[0].1, - vec!["a", "", "ro", "h", "ma", "n"] + vec!["a", "", "r", "ro", "h", "ma", "n"] ); let result = &q.encode("masyaallah")[0]; assert_eq!(result.0.chars().count(), result.1.len()); assert_eq!( result.1, - vec!["m", "a", "", "sy", "a", "a", "", "", "l", "la", "h"] + vec!["m", "a", "", "sy", "a", "a", "", "", "", "l", "la", "h"] ); } @@ -306,7 +312,6 @@ mod tests { fn test_quranize_empty_result() { let q = Quranize::new(23); assert!(q.encode("").is_empty()); - assert!(q.encode("aaa").is_empty()); assert!(q.encode("abcd").is_empty()); assert!(q.encode("1+2=3").is_empty()); } @@ -327,8 +332,8 @@ mod tests { fn test_locate() { let q = Quranize::new(23); assert_eq!(q.get_locations("بسم").first(), Some(&(1, 1, 1))); - assert_eq!(q.get_locations("والناس").last(), Some(&(114, 6, 3))); - assert_eq!(q.get_locations("بسم الله الرحمن الرحيم").len(), 2); + assert_eq!(q.get_locations("والنّاس").last(), Some(&(114, 6, 3))); + assert_eq!(q.get_locations("بسم اللّه الرّحمن الرّحيم").len(), 2); assert_eq!(q.get_locations("ن").first(), Some(&(68, 1, 1))); assert!(q.get_locations("").is_empty()); assert!(q.get_locations("نن").is_empty()); diff --git a/src/normalization.rs b/src/normalization.rs index 95cb407..7a13991 100644 --- a/src/normalization.rs +++ b/src/normalization.rs @@ -1,11 +1,8 @@ pub(crate) fn normalize(text: &str) -> String { - let mut chars = Vec::from_iter(text.chars().filter_map(|c| match c.to_ascii_lowercase() { + let chars = Vec::from_iter(text.chars().filter_map(|c| match c.to_ascii_lowercase() { c @ ('a'..='z' | '\'' | ' ') => Some(c), _ => None, })); - chars.dedup_by(|&mut a, &mut b| { - a == b && !(a == 'l' || a == 'a' || a == 'o' || a == 'i' || a == 'u') - }); chars.into_iter().filter(|&c| c != ' ').collect() } @@ -28,8 +25,8 @@ mod tests { assert_eq!(normalize("'aalimul ghoibi"), "'aalimulghoibi"); assert_eq!(normalize("Qul A'udzu"), "qula'udzu"); assert_eq!( - normalize("bismilla hirrohman nirrohiim"), - "bismillahirohmannirohiim" + normalize("bismilla hirrohma nirrohiim"), + "bismillahirrohmanirrohiim" ); } diff --git a/src/quran/mod.rs b/src/quran/mod.rs index 18afaf0..ee4a170 100644 --- a/src/quran/mod.rs +++ b/src/quran/mod.rs @@ -8,7 +8,7 @@ use simple_plain::RAW_QURAN as SIMPLE_PLAIN; const SURA_COUNT: usize = 114; const AYA_COUNT: usize = 6236; -/// Accept raw Quran string, return an iterator for each ayah in the Quran with surah number and ayah number. +/// Returns an iterator of `(sura_number, aya_number, aya_text)` that iterates each ayah in the Quran. pub(crate) fn iter() -> impl Iterator { iter_quran(SIMPLE_PLAIN) } @@ -33,10 +33,11 @@ pub(crate) trait CleanCharsExt { fn clean_chars(&self) -> Filter bool>; } -use crate::transliterations as trans; +use crate::transliterations::{self as trans, TASYDID}; impl CleanCharsExt for str { fn clean_chars(&self) -> Filter bool> { - self.chars().filter(|&c| !trans::map(c).is_empty()) + self.chars() + .filter(|&c| c == TASYDID || !trans::map(c).is_empty()) } } @@ -112,7 +113,7 @@ mod tests { assert_same_basmalah(SIMPLE_PLAIN); assert_eq!(iter_quran(SIMPLE_PLAIN).count(), AYA_COUNT); assert_eq!(count_unique_simple_clean_chars(), 37); - assert_eq!(count_unique_simple_plain_chars(), 37); + assert_eq!(count_unique_simple_plain_chars(), 38); } fn assert_same_basmalah(raw: &str) { @@ -160,7 +161,13 @@ mod tests { #[test] fn test_clean_chars() { for ((_, _, clean), (_, _, plain)) in iter_quran(SIMPLE_CLEAN).zip(iter()) { - assert_eq!(clean, plain.clean_chars().collect::()); + assert_eq!( + clean, + plain + .clean_chars() + .filter(|&c| c != TASYDID) + .collect::() + ); } } } diff --git a/src/transliterations.rs b/src/transliterations.rs index 8fb9666..f9af94e 100644 --- a/src/transliterations.rs +++ b/src/transliterations.rs @@ -1,3 +1,5 @@ +pub(crate) const TASYDID: char = '\u{0651}'; + pub(crate) fn map(c: char) -> &'static [&'static str] { match c { 'ء' => &["'", "k", "a", "i", "u"], @@ -43,7 +45,7 @@ pub(crate) fn map(c: char) -> &'static [&'static str] { 'ف' => &["f", "fa", "fi", "fu"], 'ق' => &["k", "ko", "ki", "ku", "q", "qo", "qi", "qu", "qa"], 'ك' => &["k", "ka", "ki", "ku"], - 'ل' => &["l", "ll", "lla", "la", "li", "lu"], + 'ل' => &["l", "la", "li", "lu"], 'م' => &["m", "ma", "mi", "mu"], 'ن' => &["n", "na", "ni", "nu"], 'ه' => &["h", "ha", "hi", "hu"], @@ -57,9 +59,12 @@ pub(crate) fn map(c: char) -> &'static [&'static str] { pub(crate) fn contextual_map(prev_c: char, c: char) -> &'static [&'static str] { match (prev_c, c) { - (' ', 'ا') | ('ب', 'ا') | ('ا', 'ل') | ('و', 'ا') | ('أ', 'و') => &[""], + (' ', 'ا') | ('ب', 'ا') | ('ا', 'ل') | ('آ', 'ل') | ('و', 'ا') | ('أ', 'و') => { + &[""] + } ('\0', 'ا') => &["i", "u"], ('ل', 'ل') => &["i"], + (_, TASYDID) => map(prev_c), _ => &[], } }