Skip to content

Commit

Permalink
Implement L1 rule
Browse files Browse the repository at this point in the history
To be able to implement L1, we need access to more information from
`BidiInfo`, namely `original_classes` of the `text`, in `visual_runs()`,
which would mean it should pass through `reorder_line()`.

The fact that information from `BidiInfo` is needed for both steps of
the public API (generating `BidiInfo` and consuming it
per-paragraph/per-level) made me change the API design and move these
methods into `impl BidiInfo`.

Then, since we needed access to `text` for every `BidiInfo` consumption,
I added a reference to `text` to `BidiInfo`, which also enables more
compile-time checks for `BidiInfo` isntance not outliving the text in
the user code.

NOTE: We are already breaking API in version 0.3.0 and paving for full
spec support is a good reason to do so, IMHO.

The L1 rule works by one pass on the text of the line.

Conformance Test: this implementation reduces the number of failures
from 60494 to 23770 (out of total 256747 cases).

Fix #2
  • Loading branch information
behnam committed May 12, 2017
1 parent bd2efc8 commit 9907c1d
Show file tree
Hide file tree
Showing 8 changed files with 464 additions and 361 deletions.
1 change: 1 addition & 0 deletions AUTHORS
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
This software was written by the following people:

Matt Brubeck <[email protected]>
Behnam Esfahbod <[email protected]>
9 changes: 8 additions & 1 deletion src/char_data/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,13 @@ pub fn bidi_class(c: char) -> BidiClass {
bsearch_range_value_table(c, bidi_class_table)
}

pub fn is_rtl(bidi_class: BidiClass) -> bool {
match bidi_class {
RLE | RLO | RLI => true,
_ => false,
}
}

fn bsearch_range_value_table(c: char, r: &'static [(char, char, BidiClass)]) -> BidiClass {
match r.binary_search_by(
|&(lo, hi, _)| if lo <= c && c <= hi {
Expand All @@ -45,7 +52,7 @@ fn bsearch_range_value_table(c: char, r: &'static [(char, char, BidiClass)]) ->
}

#[cfg(test)]
mod test {
mod tests {
use super::*;

#[test]
Expand Down
27 changes: 11 additions & 16 deletions src/explicit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,21 @@
//!
//! http://www.unicode.org/reports/tr9/#Explicit_Levels_and_Directions
use super::char_data::BidiClass;
use super::char_data::{BidiClass, is_rtl};
use super::level::Level;

use BidiClass::*;

/// Compute explicit embedding levels for one paragraph of text (X1-X8).
///
/// `classes[i]` must contain the BidiClass of the char at byte index `i`,
/// `processing_classes[i]` must contain the BidiClass of the char at byte index `i`,
/// for each char in `text`.
pub fn compute(
text: &str,
para_level: Level,
initial_classes: &[BidiClass],
levels: &mut [Level],
classes: &mut [BidiClass],
processing_classes: &mut [BidiClass],
) {
assert!(text.len() == initial_classes.len());

Expand All @@ -41,13 +41,8 @@ pub fn compute(
match initial_classes[i] {
// Rules X2-X5c
RLE | LRE | RLO | LRO | RLI | LRI | FSI => {
let char_is_rtl = match initial_classes[i] {
RLE | RLO | RLI => true,
_ => false,
};

let last_level = stack.last().level;
let new_level = if char_is_rtl {
let new_level = if is_rtl(initial_classes[i]) {
last_level.new_explicit_next_rtl()
} else {
last_level.new_explicit_next_ltr()
Expand All @@ -58,8 +53,8 @@ pub fn compute(
if is_isolate {
levels[i] = last_level;
match stack.last().status {
OverrideStatus::RTL => classes[i] = R,
OverrideStatus::LTR => classes[i] = L,
OverrideStatus::RTL => processing_classes[i] = R,
OverrideStatus::LTR => processing_classes[i] = L,
_ => {}
}
}
Expand Down Expand Up @@ -108,8 +103,8 @@ pub fn compute(
let last = stack.last();
levels[i] = last.level;
match last.status {
OverrideStatus::RTL => classes[i] = R,
OverrideStatus::LTR => classes[i] = L,
OverrideStatus::RTL => processing_classes[i] = R,
OverrideStatus::LTR => processing_classes[i] = L,
_ => {}
}
}
Expand All @@ -135,16 +130,16 @@ pub fn compute(
let last = stack.last();
levels[i] = last.level;
match last.status {
OverrideStatus::RTL => classes[i] = R,
OverrideStatus::LTR => classes[i] = L,
OverrideStatus::RTL => processing_classes[i] = R,
OverrideStatus::LTR => processing_classes[i] = L,
_ => {}
}
}
}
// Handle multi-byte characters.
for j in 1..c.len_utf8() {
levels[i + j] = levels[i];
classes[i + j] = classes[i];
processing_classes[i + j] = processing_classes[i];
}
}
}
Expand Down
44 changes: 22 additions & 22 deletions src/implicit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ use BidiClass::*;
/// 3.3.4 Resolving Weak Types
///
/// http://www.unicode.org/reports/tr9/#Resolving_Weak_Types
pub fn resolve_weak(sequence: &IsolatingRunSequence, classes: &mut [BidiClass]) {
pub fn resolve_weak(sequence: &IsolatingRunSequence, processing_classes: &mut [BidiClass]) {
// FIXME (#8): This function applies steps W1-W6 in a single pass. This can produce
// incorrect results in cases where a "later" rule changes the value of `prev_class` seen
// by an "earlier" rule. We should either split this into separate passes, or preserve
Expand All @@ -41,38 +41,38 @@ pub fn resolve_weak(sequence: &IsolatingRunSequence, classes: &mut [BidiClass])
.flat_map(id as fn(LevelRun) -> LevelRun);

while let Some(i) = indices.next() {
match classes[i] {
match processing_classes[i] {
// http://www.unicode.org/reports/tr9/#W1
NSM => {
classes[i] = match prev_class {
processing_classes[i] = match prev_class {
RLI | LRI | FSI | PDI => ON,
_ => prev_class,
};
}
EN => {
if last_strong_is_al {
// W2. If previous strong char was AL, change EN to AN.
classes[i] = AN;
processing_classes[i] = AN;
} else {
// W5. If a run of ETs is adjacent to an EN, change the ETs to EN.
for j in &et_run_indices {
classes[*j] = EN;
processing_classes[*j] = EN;
}
et_run_indices.clear();
}
}
// http://www.unicode.org/reports/tr9/#W3
AL => classes[i] = R,
AL => processing_classes[i] = R,

// http://www.unicode.org/reports/tr9/#W4
ES | CS => {
let next_class = indices
.clone()
.map(|j| classes[j])
.map(|j| processing_classes[j])
.filter(not_removed_by_x9)
.next()
.unwrap_or(sequence.eos);
classes[i] = match (prev_class, classes[i], next_class) {
processing_classes[i] = match (prev_class, processing_classes[i], next_class) {
(EN, ES, EN) | (EN, CS, EN) => EN,
(AN, CS, AN) => AN,
(_, _, _) => ON,
Expand All @@ -81,7 +81,7 @@ pub fn resolve_weak(sequence: &IsolatingRunSequence, classes: &mut [BidiClass])
// http://www.unicode.org/reports/tr9/#W5
ET => {
match prev_class {
EN => classes[i] = EN,
EN => processing_classes[i] = EN,
_ => et_run_indices.push(i), // In case this is followed by an EN.
}
}
Expand All @@ -92,7 +92,7 @@ pub fn resolve_weak(sequence: &IsolatingRunSequence, classes: &mut [BidiClass])
}
}

prev_class = classes[i];
prev_class = processing_classes[i];
match prev_class {
L | R => {
last_strong_is_al = false;
Expand All @@ -105,7 +105,7 @@ pub fn resolve_weak(sequence: &IsolatingRunSequence, classes: &mut [BidiClass])
if prev_class != ET {
// W6. If we didn't find an adjacent EN, turn any ETs into ON instead.
for j in &et_run_indices {
classes[*j] = ON;
processing_classes[*j] = ON;
}
et_run_indices.clear();
}
Expand All @@ -115,9 +115,9 @@ pub fn resolve_weak(sequence: &IsolatingRunSequence, classes: &mut [BidiClass])
let mut last_strong_is_l = sequence.sos == L;
for run in &sequence.runs {
for i in run.clone() {
match classes[i] {
match processing_classes[i] {
EN if last_strong_is_l => {
classes[i] = L;
processing_classes[i] = L;
}
L => {
last_strong_is_l = true;
Expand All @@ -137,7 +137,7 @@ pub fn resolve_weak(sequence: &IsolatingRunSequence, classes: &mut [BidiClass])
pub fn resolve_neutral(
sequence: &IsolatingRunSequence,
levels: &[Level],
classes: &mut [BidiClass],
processing_classes: &mut [BidiClass],
) {
let mut indices = sequence.runs.iter().flat_map(Clone::clone);
let mut prev_class = sequence.sos;
Expand All @@ -154,18 +154,18 @@ pub fn resolve_neutral(

// Process sequences of NI characters.
let mut ni_run = Vec::new();
if ni(classes[i]) {
if ni(processing_classes[i]) {
// Consume a run of consecutive NI characters.
ni_run.push(i);
let mut next_class;
loop {
match indices.next() {
Some(j) => {
i = j;
if removed_by_x9(classes[i]) {
if removed_by_x9(processing_classes[i]) {
continue;
}
next_class = classes[j];
next_class = processing_classes[j];
if ni(next_class) {
ni_run.push(i);
} else {
Expand All @@ -187,11 +187,11 @@ pub fn resolve_neutral(
(_, _) => levels[i].bidi_class(),
};
for j in &ni_run {
classes[*j] = new_class;
processing_classes[*j] = new_class;
}
ni_run.clear();
}
prev_class = classes[i];
prev_class = processing_classes[i];
}
}

Expand All @@ -200,12 +200,12 @@ pub fn resolve_neutral(
/// Returns the maximum embedding level in the paragraph.
///
/// http://www.unicode.org/reports/tr9/#Resolving_Implicit_Levels
pub fn resolve_levels(classes: &[BidiClass], levels: &mut [Level]) -> Level {
pub fn resolve_levels(original_classes: &[BidiClass], levels: &mut [Level]) -> Level {
let mut max_level = Level::ltr();

assert!(classes.len() == levels.len());
assert!(original_classes.len() == levels.len());
for i in 0..levels.len() {
match (levels[i].is_rtl(), classes[i]) {
match (levels[i].is_rtl(), original_classes[i]) {
// http://www.unicode.org/reports/tr9/#I1
(false, R) => levels[i].raise(1).expect("Level number error"),
(false, AN) | (false, EN) => levels[i].raise(2).expect("Level number error"),
Expand Down
2 changes: 1 addition & 1 deletion src/level.rs
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ impl<'a> PartialEq<&'a str> for Level {
}

#[cfg(test)]
mod test {
mod tests {
use super::*;

#[test]
Expand Down
Loading

0 comments on commit 9907c1d

Please sign in to comment.