Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle case "laa yukallifullah" #31

Merged
merged 4 commits into from
Apr 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "quranize"
version = "0.10.4"
version = "0.10.5"
authors = ["Alfan Nur Fauzan <[email protected]>"]
edition = "2021"
description = "Encoding transliterations into Quran forms."
Expand Down
31 changes: 18 additions & 13 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
//!
//! ```
//! let q = quranize::Quranize::default();
//! assert_eq!(q.encode("alhamdulillah").first().unwrap().0, "الحمد لله");
//! assert_eq!(q.encode("alhamdulillah").first().unwrap().0, "الحمد للّه");
//! ```
//!
//! ## Getting an aya text given surah number and ayah number
Expand Down Expand Up @@ -60,7 +60,7 @@ impl Default for Quranize {
///
/// ```
/// let q = quranize::Quranize::default(); // the same with `Quranize::new(usize::MAX)`
/// assert_eq!(q.encode("masyaallah").first().unwrap().0, "ما شاء الله");
/// assert_eq!(q.encode("masyaallah").first().unwrap().0, "ما شاء اللّه");
/// ```
fn default() -> Self {
Self::new(usize::MAX)
Expand All @@ -77,7 +77,7 @@ impl Quranize {
///
/// ```
/// let q = quranize::Quranize::new(35);
/// assert_eq!(q.encode("masyaallah").first().unwrap().0, "ما شاء الله");
/// assert_eq!(q.encode("masyaallah").first().unwrap().0, "ما شاء اللّه");
/// let q = quranize::Quranize::new(1);
/// assert_eq!(q.encode("masyaallah").first(), None);
/// ```
Expand Down Expand Up @@ -215,11 +215,16 @@ impl Quranize {

#[cfg(test)]
mod tests {
use crate::transliterations::TASYDID;

use super::*;

impl Quranize {
fn e(&self, text: &str) -> Vec<String> {
self.encode(text).into_iter().map(|(q, _, _)| q).collect()
self.encode(text)
.into_iter()
.map(|(q, _, _)| q.chars().filter(|&c| c != TASYDID).collect())
.collect()
}
}

Expand All @@ -235,7 +240,7 @@ mod tests {
assert_eq!(q.e("birobbinnas"), vec!["برب الناس"]);
assert_eq!(q.e("inna anzalnahu"), vec!["إنا أنزلناه"]);
assert_eq!(q.e("wa'tasimu"), vec!["واعتصموا"]);
assert_eq!(q.e("wabarro"), vec!["وبرا", "وبئر"]);
assert_eq!(q.e("wabarro"), vec!["وبرا"]);
assert_eq!(q.e("idza qodho"), vec!["إذا قضى"]);
assert_eq!(q.e("masyaallah"), vec!["ما شاء الله"]);
assert_eq!(q.e("illa man taaba"), vec!["إلا من تاب"]);
Expand All @@ -248,6 +253,7 @@ mod tests {
assert_eq!(q.e("undur kaifa"), vec!["انظر كيف"]);
assert_eq!(q.e("lirrohman"), vec!["للرحمن"]);
assert_eq!(q.e("wantum muslimun"), vec!["وأنتم مسلمون"]);
assert_eq!(q.e("laa yukallifullah"), vec!["لا يكلف الله"]);

assert_eq!(
q.e("bismillahirrohmanirrohiim"),
Expand Down Expand Up @@ -279,34 +285,33 @@ mod tests {
assert_eq!(q.e("kaaaf haa yaa aiiin shoood"), vec!["كهيعص"]);
assert_eq!(q.e("kaf ha ya 'ain shod"), vec!["كهيعص"]);

assert_eq!(q.locations_index.len(), 685_770);
assert_eq!(q.locations_index.len(), 686_059);
}

#[test]
fn test_quranize_misc() {
let q = Quranize::new(23);
assert_eq!(q.encode("bismillah")[0].1.len(), 8);
assert_eq!(q.encode("bismillah")[0].1.len(), 9);
assert_eq!(q.encode("bismillah")[0].2, 3);
assert_eq!(q.encode("arrohman").len(), 1);
assert_eq!(q.encode("arrohman")[0].1.len(), 6);
assert_eq!(q.encode("arrohman")[0].1.len(), 7);
assert_eq!(q.encode("alhamdu")[0].1, vec!["al", "ha", "m", "du"]);
assert_eq!(
q.encode("arrohman")[0].1,
vec!["a", "", "ro", "h", "ma", "n"]
vec!["a", "", "r", "ro", "h", "ma", "n"]
);
let result = &q.encode("masyaallah")[0];
assert_eq!(result.0.chars().count(), result.1.len());
assert_eq!(
result.1,
vec!["m", "a", "", "sy", "a", "a", "", "", "l", "la", "h"]
vec!["m", "a", "", "sy", "a", "a", "", "", "", "l", "la", "h"]
);
}

#[test]
fn test_quranize_empty_result() {
let q = Quranize::new(23);
assert!(q.encode("").is_empty());
assert!(q.encode("aaa").is_empty());
assert!(q.encode("abcd").is_empty());
assert!(q.encode("1+2=3").is_empty());
}
Expand All @@ -327,8 +332,8 @@ mod tests {
fn test_locate() {
let q = Quranize::new(23);
assert_eq!(q.get_locations("بسم").first(), Some(&(1, 1, 1)));
assert_eq!(q.get_locations("والناس").last(), Some(&(114, 6, 3)));
assert_eq!(q.get_locations("بسم الله الرحمن الرحيم").len(), 2);
assert_eq!(q.get_locations("والنّاس").last(), Some(&(114, 6, 3)));
assert_eq!(q.get_locations("بسم اللّه الرّحمن الرّحيم").len(), 2);
assert_eq!(q.get_locations("ن").first(), Some(&(68, 1, 1)));
assert!(q.get_locations("").is_empty());
assert!(q.get_locations("نن").is_empty());
Expand Down
9 changes: 3 additions & 6 deletions src/normalization.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
pub(crate) fn normalize(text: &str) -> String {
let mut chars = Vec::from_iter(text.chars().filter_map(|c| match c.to_ascii_lowercase() {
let chars = Vec::from_iter(text.chars().filter_map(|c| match c.to_ascii_lowercase() {
c @ ('a'..='z' | '\'' | ' ') => Some(c),
_ => None,
}));
chars.dedup_by(|&mut a, &mut b| {
a == b && !(a == 'l' || a == 'a' || a == 'o' || a == 'i' || a == 'u')
});
chars.into_iter().filter(|&c| c != ' ').collect()
}

Expand All @@ -28,8 +25,8 @@ mod tests {
assert_eq!(normalize("'aalimul ghoibi"), "'aalimulghoibi");
assert_eq!(normalize("Qul A'udzu"), "qula'udzu");
assert_eq!(
normalize("bismilla hirrohman nirrohiim"),
"bismillahirohmannirohiim"
normalize("bismilla hirrohma nirrohiim"),
"bismillahirrohmanirrohiim"
);
}

Expand Down
17 changes: 12 additions & 5 deletions src/quran/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use simple_plain::RAW_QURAN as SIMPLE_PLAIN;
const SURA_COUNT: usize = 114;
const AYA_COUNT: usize = 6236;

/// Accept raw Quran string, return an iterator for each ayah in the Quran with surah number and ayah number.
/// Returns an iterator of `(sura_number, aya_number, aya_text)` that iterates each ayah in the Quran.
pub(crate) fn iter() -> impl Iterator<Item = (u8, u16, &'static str)> {
iter_quran(SIMPLE_PLAIN)
}
Expand All @@ -33,10 +33,11 @@ pub(crate) trait CleanCharsExt {
fn clean_chars(&self) -> Filter<Chars, fn(&char) -> bool>;
}

use crate::transliterations as trans;
use crate::transliterations::{self as trans, TASYDID};
impl CleanCharsExt for str {
fn clean_chars(&self) -> Filter<Chars, fn(&char) -> bool> {
self.chars().filter(|&c| !trans::map(c).is_empty())
self.chars()
.filter(|&c| c == TASYDID || !trans::map(c).is_empty())
}
}

Expand Down Expand Up @@ -112,7 +113,7 @@ mod tests {
assert_same_basmalah(SIMPLE_PLAIN);
assert_eq!(iter_quran(SIMPLE_PLAIN).count(), AYA_COUNT);
assert_eq!(count_unique_simple_clean_chars(), 37);
assert_eq!(count_unique_simple_plain_chars(), 37);
assert_eq!(count_unique_simple_plain_chars(), 38);
}

fn assert_same_basmalah(raw: &str) {
Expand Down Expand Up @@ -160,7 +161,13 @@ mod tests {
#[test]
fn test_clean_chars() {
for ((_, _, clean), (_, _, plain)) in iter_quran(SIMPLE_CLEAN).zip(iter()) {
assert_eq!(clean, plain.clean_chars().collect::<String>());
assert_eq!(
clean,
plain
.clean_chars()
.filter(|&c| c != TASYDID)
.collect::<String>()
);
}
}
}
9 changes: 7 additions & 2 deletions src/transliterations.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
pub(crate) const TASYDID: char = '\u{0651}';

pub(crate) fn map(c: char) -> &'static [&'static str] {
match c {
'ء' => &["'", "k", "a", "i", "u"],
Expand Down Expand Up @@ -43,7 +45,7 @@ pub(crate) fn map(c: char) -> &'static [&'static str] {
'ف' => &["f", "fa", "fi", "fu"],
'ق' => &["k", "ko", "ki", "ku", "q", "qo", "qi", "qu", "qa"],
'ك' => &["k", "ka", "ki", "ku"],
'ل' => &["l", "ll", "lla", "la", "li", "lu"],
'ل' => &["l", "la", "li", "lu"],
'م' => &["m", "ma", "mi", "mu"],
'ن' => &["n", "na", "ni", "nu"],
'ه' => &["h", "ha", "hi", "hu"],
Expand All @@ -57,9 +59,12 @@ pub(crate) fn map(c: char) -> &'static [&'static str] {

pub(crate) fn contextual_map(prev_c: char, c: char) -> &'static [&'static str] {
match (prev_c, c) {
(' ', 'ا') | ('ب', 'ا') | ('ا', 'ل') | ('و', 'ا') | ('أ', 'و') => &[""],
(' ', 'ا') | ('ب', 'ا') | ('ا', 'ل') | ('آ', 'ل') | ('و', 'ا') | ('أ', 'و') => {
&[""]
}
('\0', 'ا') => &["i", "u"],
('ل', 'ل') => &["i"],
(_, TASYDID) => map(prev_c),
_ => &[],
}
}
Expand Down