Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Conformance Test and implement L1 rule #30

Merged
merged 5 commits into from
May 15, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AUTHORS
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
This software was written by the following people:

Matt Brubeck <[email protected]>
Behnam Esfahbod <[email protected]>
12 changes: 9 additions & 3 deletions src/char_data/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@

//! Accessor for `Bidi_Class` property from Unicode Character Database (UCD)

// TODO: Make private after dropping deprecated call
pub mod tables;
mod tables;

pub use self::tables::{BidiClass, UNICODE_VERSION};

Expand All @@ -25,6 +24,13 @@ pub fn bidi_class(c: char) -> BidiClass {
bsearch_range_value_table(c, bidi_class_table)
}

pub fn is_rtl(bidi_class: BidiClass) -> bool {
match bidi_class {
RLE | RLO | RLI => true,
_ => false,
}
}

fn bsearch_range_value_table(c: char, r: &'static [(char, char, BidiClass)]) -> BidiClass {
match r.binary_search_by(
|&(lo, hi, _)| if lo <= c && c <= hi {
Expand All @@ -46,7 +52,7 @@ fn bsearch_range_value_table(c: char, r: &'static [(char, char, BidiClass)]) ->
}

#[cfg(test)]
mod test {
mod tests {
use super::*;

#[test]
Expand Down
68 changes: 24 additions & 44 deletions src/explicit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,21 @@
//!
//! http://www.unicode.org/reports/tr9/#Explicit_Levels_and_Directions

use super::BidiClass;
use super::BidiClass::*;
use super::char_data::{BidiClass, is_rtl};
use super::level::Level;

use BidiClass::*;

/// Compute explicit embedding levels for one paragraph of text (X1-X8).
///
/// `classes[i]` must contain the BidiClass of the char at byte index `i`,
/// `processing_classes[i]` must contain the BidiClass of the char at byte index `i`,
/// for each char in `text`.
pub fn compute(
text: &str,
para_level: u8,
para_level: Level,
initial_classes: &[BidiClass],
levels: &mut [u8],
classes: &mut [BidiClass],
levels: &mut [Level],
processing_classes: &mut [BidiClass],
) {
assert!(text.len() == initial_classes.len());

Expand All @@ -39,30 +41,27 @@ pub fn compute(
match initial_classes[i] {
// Rules X2-X5c
RLE | LRE | RLO | LRO | RLI | LRI | FSI => {
let is_rtl = match initial_classes[i] {
RLE | RLO | RLI => true,
_ => false,
};

let last_level = stack.last().level;
let new_level = match is_rtl {
true => next_rtl_level(last_level),
false => next_ltr_level(last_level),
let new_level = if is_rtl(initial_classes[i]) {
last_level.new_explicit_next_rtl()
} else {
last_level.new_explicit_next_ltr()
};

// X5a-X5c: Isolate initiators get the level of the last entry on the stack.
let is_isolate = matches!(initial_classes[i], RLI | LRI | FSI);
if is_isolate {
levels[i] = last_level;
match stack.last().status {
OverrideStatus::RTL => classes[i] = R,
OverrideStatus::LTR => classes[i] = L,
OverrideStatus::RTL => processing_classes[i] = R,
OverrideStatus::LTR => processing_classes[i] = L,
_ => {}
}
}

if valid(new_level) && overflow_isolate_count == 0 &&
if new_level.is_ok() && overflow_isolate_count == 0 &&
overflow_embedding_count == 0 {
let new_level = new_level.unwrap();
stack.push(
new_level,
match initial_classes[i] {
Expand Down Expand Up @@ -104,8 +103,8 @@ pub fn compute(
let last = stack.last();
levels[i] = last.level;
match last.status {
OverrideStatus::RTL => classes[i] = R,
OverrideStatus::LTR => classes[i] = L,
OverrideStatus::RTL => processing_classes[i] = R,
OverrideStatus::LTR => processing_classes[i] = L,
_ => {}
}
}
Expand All @@ -131,42 +130,23 @@ pub fn compute(
let last = stack.last();
levels[i] = last.level;
match last.status {
OverrideStatus::RTL => classes[i] = R,
OverrideStatus::LTR => classes[i] = L,
OverrideStatus::RTL => processing_classes[i] = R,
OverrideStatus::LTR => processing_classes[i] = L,
_ => {}
}
}
}
// Handle multi-byte characters.
for j in 1..c.len_utf8() {
levels[i + j] = levels[i];
classes[i + j] = classes[i];
processing_classes[i + j] = processing_classes[i];
}
}
}

/// Maximum depth of the directional status stack.
pub const MAX_DEPTH: u8 = 125;

/// Levels from 0 through max_depth are valid at this stage.
/// http://www.unicode.org/reports/tr9/#X1
fn valid(level: u8) -> bool {
level <= MAX_DEPTH
}

/// The next odd level greater than `level`.
fn next_rtl_level(level: u8) -> u8 {
(level + 1) | 1
}

/// The next even level greater than `level`.
fn next_ltr_level(level: u8) -> u8 {
(level + 2) & !1
}

/// Entries in the directional status stack:
struct Status {
level: u8,
level: Level,
status: OverrideStatus,
}

Expand All @@ -184,9 +164,9 @@ struct DirectionalStatusStack {

impl DirectionalStatusStack {
fn new() -> Self {
DirectionalStatusStack { vec: Vec::with_capacity(MAX_DEPTH as usize + 2) }
DirectionalStatusStack { vec: Vec::with_capacity(Level::max_explicit_depth() as usize + 2) }
}
fn push(&mut self, level: u8, status: OverrideStatus) {
fn push(&mut self, level: Level, status: OverrideStatus) {
self.vec
.push(
Status {
Expand Down
42 changes: 42 additions & 0 deletions src/format_chars.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// Copyright 2014 The html5ever Project Developers. See the
// COPYRIGHT file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

//! Directional Formatting Characters
//!
//! http://www.unicode.org/reports/tr9/#Directional_Formatting_Characters

// == Implicit ==
/// ARABIC LETTER MARK
pub const ALM: char = '\u{061C}';
/// LEFT-TO-RIGHT MARK
pub const LRM: char = '\u{200E}';
/// RIGHT-TO-LEFT MARK
pub const RLM: char = '\u{200F}';

// == Explicit Isolates ==
/// LEFT‑TO‑RIGHT ISOLATE
pub const LRI: char = '\u{2066}';
/// RIGHT‑TO‑LEFT ISOLATE
pub const RLI: char = '\u{2067}';
/// FIRST STRONG ISOLATE
pub const FSI: char = '\u{2068}';
/// POP DIRECTIONAL ISOLATE
pub const PDI: char = '\u{2069}';

// == Explicit Embeddings and Overrides ==
/// LEFT-TO-RIGHT EMBEDDING
pub const LRE: char = '\u{202A}';
/// RIGHT-TO-LEFT EMBEDDING
pub const RLE: char = '\u{202B}';
/// LEFT-TO-RIGHT OVERRIDE
pub const LRO: char = '\u{202D}';
/// RIGHT-TO-LEFT OVERRIDE
pub const RLO: char = '\u{202E}';
/// POP DIRECTIONAL FORMATTING
pub const PDF: char = '\u{202C}';
Loading