Skip to content

Commit

Permalink
Optimize adjacency list data structure (#28)
Browse files Browse the repository at this point in the history
* minor: sort should use the unstable version instead

* use smaller data structure to store adjacency

* bump version

* remove unused lifetime annotations

* addd unit test for List

* minor
  • Loading branch information
alpancs authored Mar 21, 2023
1 parent d7d1298 commit c252df2
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 13 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "quranize"
version = "0.10.1"
version = "0.10.2"
authors = ["Alfan Nur Fauzan <[email protected]>"]
edition = "2021"
description = "Encoding transliterations into Quran forms."
Expand Down
61 changes: 61 additions & 0 deletions src/collections/list.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#[derive(Default)]
pub enum List<T> {
#[default]
Empty,
Cons(T, Box<List<T>>),
}

impl<T> List<T> {
pub fn push(&mut self, e: T) {
*self = List::Cons(e, Box::new(std::mem::take(self)));
}

pub fn iter(&self) -> Iter<T> {
Iter { list: self }
}

#[cfg(test)]
pub fn len(&self) -> usize {
self.iter().count()
}

#[cfg(test)]
pub fn is_empty(&self) -> bool {
matches!(self, Self::Empty)
}
}

pub struct Iter<'a, T> {
list: &'a List<T>,
}

impl<'a, T> Iterator for Iter<'a, T> {
type Item = &'a T;
fn next(&mut self) -> Option<Self::Item> {
match self.list {
List::Cons(head, tail) => {
self.list = tail;
Some(head)
}
_ => None,
}
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_list() {
let mut list: List<u8> = Default::default();
assert!(list.is_empty());
list.push(1);
list.push(2);
assert_eq!(list.len(), 2);
let mut iter = list.iter();
assert_eq!(iter.next(), Some(&2));
assert_eq!(iter.next(), Some(&1));
assert_eq!(iter.next(), None);
}
}
2 changes: 2 additions & 0 deletions src/collections/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
mod list;
pub use list::List;
29 changes: 17 additions & 12 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,22 @@
//! assert_eq!(aya_getter.get(1, 1), Some("بِسْمِ اللَّهِ الرَّحْمَـٰنِ الرَّحِيمِ"));
//! ```
mod normalization;
mod quran;
mod transliterations;
mod word_utils;

use std::{collections::HashMap, str::Chars};

mod collections;
use collections::List;

mod normalization;
use normalization::{normalize, normalize_first_aya};

mod quran;
pub use quran::AyaGetter;
use quran::CleanCharsExt;

mod transliterations;
use transliterations as trans;

mod word_utils;
use word_utils::WordSuffixIterExt;

type EncodeResults<'a> = Vec<(String, Vec<&'a str>, usize)>;
Expand All @@ -43,7 +48,7 @@ type NodeIndex = usize;

/// Struct to encode alphabetic text to quran text.
pub struct Quranize {
adjacencies: Vec<Vec<NodeIndex>>,
adjacencies: Vec<List<NodeIndex>>,
harfs: Vec<char>,
locations_index: HashMap<NodeIndex, Vec<Location>>,
node_id: NodeIndex,
Expand Down Expand Up @@ -80,8 +85,8 @@ impl Quranize {
/// ```
pub fn new(min_harfs: usize) -> Self {
let mut quranize = Self {
adjacencies: vec![vec![]],
harfs: vec![0 as char],
adjacencies: vec![Default::default()],
harfs: vec![Default::default()],
locations_index: Default::default(),
node_id: 0,
};
Expand Down Expand Up @@ -113,7 +118,7 @@ impl Quranize {
Some(&j) => j,
None => {
self.node_id += 1;
self.adjacencies.push(vec![]);
self.adjacencies.push(Default::default());
self.harfs.push(harf);
self.adjacencies[i].push(self.node_id);
self.node_id
Expand All @@ -125,7 +130,7 @@ impl Quranize {
pub fn encode(&self, text: &str) -> EncodeResults {
let mut results = self.rev_encode(0, &normalize(text));
results.append(&mut self.rev_encode_first_aya(0, &normalize_first_aya(text)));
results.sort();
results.sort_unstable_by(|(q1, _, _), (q2, _, _)| q1.cmp(q2));
results.dedup_by(|(q1, _, _), (q2, _, _)| q1 == q2);
for (q, e, _) in results.iter_mut() {
*q = q.chars().rev().collect();
Expand All @@ -141,7 +146,7 @@ impl Quranize {
results.push((String::new(), Vec::new(), locations.len()));
}
}
for &j in &self.adjacencies[i] {
for &j in self.adjacencies[i].iter() {
let prefixes = trans::map(self.harfs[j])
.iter()
.chain(trans::contextual_map(self.harfs[i], self.harfs[j]));
Expand All @@ -168,7 +173,7 @@ impl Quranize {
if text.is_empty() && self.containing_first_aya(i) {
results.push((String::new(), Vec::new(), self.locations_index[&i].len()));
}
for &j in &self.adjacencies[i] {
for &j in self.adjacencies[i].iter() {
for prefix in trans::single_harf_map(self.harfs[j]) {
if let Some(subtext) = text.strip_prefix(prefix) {
results.append(&mut self.rev_encode_sub_fa(j, subtext, prefix));
Expand Down

0 comments on commit c252df2

Please sign in to comment.