From 22a7db4e2e1baf0f23c0c0b525a2b65699e32e57 Mon Sep 17 00:00:00 2001 From: Thomas M Kehrenberg Date: Mon, 3 Feb 2025 14:21:56 +0100 Subject: [PATCH 1/3] First steps towards custom commands --- latex2mmlc/src/ast.rs | 8 ++++++++ latex2mmlc/src/commands.rs | 7 ++++--- latex2mmlc/src/ops.rs | 4 ++-- latex2mmlc/src/token.rs | 2 ++ 4 files changed, 16 insertions(+), 5 deletions(-) diff --git a/latex2mmlc/src/ast.rs b/latex2mmlc/src/ast.rs index 00d8ceb..1bbda12 100644 --- a/latex2mmlc/src/ast.rs +++ b/latex2mmlc/src/ast.rs @@ -97,6 +97,13 @@ pub enum Node<'arena> { tf: MathVariant, content: &'arena Node<'arena>, }, + PredefinedNode(&'static Node<'static>), +} + +impl PartialEq for &'static Node<'static> { + fn eq(&self, other: &&'static Node<'static>) -> bool { + std::ptr::eq(*self, *other) + } } const INDENT: &str = " "; @@ -516,6 +523,7 @@ impl MathMLEmitter { pushln!(&mut self.s, base_indent, ""); } Node::ColumnSeparator | Node::RowSeparator => (), + Node::PredefinedNode(node) => self.emit(node, base_indent), } } } diff --git a/latex2mmlc/src/commands.rs b/latex2mmlc/src/commands.rs index eacb0c2..6048f03 100644 --- a/latex2mmlc/src/commands.rs +++ b/latex2mmlc/src/commands.rs @@ -146,7 +146,7 @@ static COMMANDS: phf::Map<&'static str, Token> = phf::phf_map! { "blacksquare" => Token::Letter(ops::BLACK_SQUARE), "bm" => Token::Transform(MathVariant::Transform(TextTransform::BoldItalic)), "boldsymbol" => Token::Transform(MathVariant::Transform(TextTransform::BoldItalic)), - "bot" => Token::Operator(ops::UP_TACK), + "bot" => Token::Letter(ops::UP_TACK), "botdoteq" => Token::Operator(ops::EQUALS_SIGN_WITH_DOT_BELOW), "boxbox" => Token::Operator(ops::SQUARED_SQUARE), "boxbslash" => Token::Operator(ops::SQUARED_FALLING_DIAGONAL_SLASH), @@ -195,6 +195,7 @@ static COMMANDS: phf::Map<&'static str, Token> = phf::phf_map! { "curlywedge" => Token::Operator(ops::CURLY_LOGICAL_AND), "curvearrowleft" => Token::Operator(ops::ANTICLOCKWISE_TOP_SEMICIRCLE_ARROW), "curvearrowright" => Token::Operator(ops::CLOCKWISE_TOP_SEMICIRCLE_ARROW), + "d" => Token::Letter('d'), "dag" => Token::Letter('†'), "dagger" => Token::Letter('†'), "daleth" => Token::Letter('ℸ'), @@ -448,7 +449,7 @@ static COMMANDS: phf::Map<&'static str, Token> = phf::phf_map! { "overset" => Token::Overset, "parallel" => Token::Operator(ops::PARALLEL_TO), "partial" => Token::Letter(ops::PARTIAL_DIFFERENTIAL), - "perp" => Token::Operator(ops::UP_TACK), + "perp" => Token::Letter(ops::UP_TACK), "phi" => Token::Letter('ϕ'), "pi" => Token::Letter('π'), "pm" => Token::Operator(ops::PLUS_MINUS_SIGN), @@ -562,7 +563,7 @@ static COMMANDS: phf::Map<&'static str, Token> = phf::phf_map! { "tilde" => Token::OverUnder(ops::TILDE, true, Some(OpAttr::StretchyFalse)), "times" => Token::Operator(ops::MULTIPLICATION_SIGN), "to" => Token::Operator(ops::RIGHTWARDS_ARROW), - "top" => Token::Operator(ops::DOWN_TACK), + "top" => Token::Letter(ops::DOWN_TACK), "triangle" => Token::Letter('△'), "triangledown" => Token::Operator(ops::WHITE_DOWN_POINTING_TRIANGLE), "triangleleft" => Token::Operator(ops::WHITE_LEFT_POINTING_TRIANGLE), diff --git a/latex2mmlc/src/ops.rs b/latex2mmlc/src/ops.rs index bafd536..6c7be31 100644 --- a/latex2mmlc/src/ops.rs +++ b/latex2mmlc/src/ops.rs @@ -399,8 +399,8 @@ pub(crate) const SQUARED_TIMES: Op = Op('⊠'); pub(crate) const SQUARED_DOT_OPERATOR: Op = Op('⊡'); pub(crate) const RIGHT_TACK: Op = Op('⊢'); pub(crate) const LEFT_TACK: Op = Op('⊣'); -pub(crate) const DOWN_TACK: Op = Op('⊤'); -pub(crate) const UP_TACK: Op = Op('⊥'); +pub(crate) const DOWN_TACK: char = '⊤'; +pub(crate) const UP_TACK: char = '⊥'; // pub(crate) const ASSERTION: Op = Op('⊦'); // pub(crate) const MODELS: Op = Op('⊧'); pub(crate) const TRUE: Op = Op('⊨'); diff --git a/latex2mmlc/src/token.rs b/latex2mmlc/src/token.rs index 95f9bb3..3c22023 100644 --- a/latex2mmlc/src/token.rs +++ b/latex2mmlc/src/token.rs @@ -2,6 +2,7 @@ use std::mem::discriminant; use strum_macros::AsRefStr; +use crate::ast::Node; use crate::attribute::{FracAttr, MathVariant, OpAttr, Size, Style, TextTransform}; use crate::ops::{Op, ParenOp}; @@ -94,6 +95,7 @@ pub enum Token<'source> { #[strum(serialize = r"\mathstrut")] Mathstrut, Style(Style), + PredefinedNode(&'static Node<'static>), UnknownCommand(&'source str), } From fce04b877161dcb0dc052cd0b1e3080fea0b2788 Mon Sep 17 00:00:00 2001 From: Thomas MK Date: Mon, 3 Feb 2025 15:49:34 +0100 Subject: [PATCH 2/3] Implement custom commands with 0 arguments --- Cargo.lock | 14 -------------- latex2mmlc/Cargo.toml | 3 --- latex2mmlc/src/arena.rs | 3 +++ latex2mmlc/src/ast.rs | 1 + latex2mmlc/src/commands.rs | 12 ++++++++++-- latex2mmlc/src/lib.rs | 4 ++++ latex2mmlc/src/ops.rs | 1 + latex2mmlc/src/parse.rs | 1 + .../snapshots/latex2mmlc__tests__RR_command.snap | 7 +++++++ .../snapshots/latex2mmlc__tests__d_command.snap | 7 +++++++ .../latex2mmlc__tests__d_command_nested.snap | 11 +++++++++++ latex2mmlc/tests/wiki_test.rs | 7 +++---- 12 files changed, 48 insertions(+), 23 deletions(-) create mode 100644 latex2mmlc/src/snapshots/latex2mmlc__tests__RR_command.snap create mode 100644 latex2mmlc/src/snapshots/latex2mmlc__tests__d_command.snap create mode 100644 latex2mmlc/src/snapshots/latex2mmlc__tests__d_command_nested.snap diff --git a/Cargo.lock b/Cargo.lock index 680d8c2..64d1ce6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -211,12 +211,9 @@ version = "0.1.0" dependencies = [ "bumpalo", "insta", - "lazy_static", - "no-panic", "phf", "regex", "serde", - "similar", "strum_macros", ] @@ -316,17 +313,6 @@ dependencies = [ "walkdir", ] -[[package]] -name = "no-panic" -version = "0.1.33" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f7da86466fe446079286ef4b2f6d789755b610a9d85da8477633f734d2697e8" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "once_cell" version = "1.19.0" diff --git a/latex2mmlc/Cargo.toml b/latex2mmlc/Cargo.toml index bec9662..6660dfa 100644 --- a/latex2mmlc/Cargo.toml +++ b/latex2mmlc/Cargo.toml @@ -17,13 +17,10 @@ categories = ["science"] [dependencies] bumpalo = "3.17.0" -no-panic = "0.1.33" phf = { version = "0.11.3", features = ["macros"] } strum_macros = "0.26.4" [dev-dependencies] insta = { version = "1.41.1", features = ["default", "ron"] } serde = { version = "1.0.217", features = ["derive"] } -lazy_static = "1.5.0" regex = "1.11.1" -similar = "2.7.0" diff --git a/latex2mmlc/src/arena.rs b/latex2mmlc/src/arena.rs index a5a17f2..8da3dfc 100644 --- a/latex2mmlc/src/arena.rs +++ b/latex2mmlc/src/arena.rs @@ -220,6 +220,9 @@ impl<'arena> NodeList<'arena> { } } +// NodeList is sync, because we don't allow mutation through shared references. +unsafe impl<'arena> Sync for NodeList<'arena> {} + #[cfg(test)] impl<'arena> Serialize for NodeList<'arena> { fn serialize(&self, serializer: S) -> Result diff --git a/latex2mmlc/src/ast.rs b/latex2mmlc/src/ast.rs index 1bbda12..a0daad8 100644 --- a/latex2mmlc/src/ast.rs +++ b/latex2mmlc/src/ast.rs @@ -185,6 +185,7 @@ impl MathMLEmitter { | Node::ColumnSeparator | Node::RowSeparator | Node::TextTransform { .. } + | Node::PredefinedNode(_) ) { // Get the base indent out of the way. new_line_and_indent(&mut self.s, base_indent); diff --git a/latex2mmlc/src/commands.rs b/latex2mmlc/src/commands.rs index 6048f03..f31ab86 100644 --- a/latex2mmlc/src/commands.rs +++ b/latex2mmlc/src/commands.rs @@ -1,3 +1,4 @@ +use crate::ast::Node; use crate::attribute::{FracAttr, MathVariant, OpAttr, Size, Style, TextTransform}; use crate::ops::{self, Op}; use crate::token::Token; @@ -68,6 +69,10 @@ static COMMANDS: phf::Map<&'static str, Token> = phf::phf_map! { "Pi" => Token::UprightLetter('Π'), "Pr" => Token::Function("Pr"), "Psi" => Token::UprightLetter('Ψ'), + "RR" => Token::PredefinedNode(&Node::TextTransform { + tf: MathVariant::Transform(TextTransform::DoubleStruck), + content: &Node::SingleLetterIdent('R', false), + }), "Re" => Token::Letter('ℜ'), "Rho" => Token::UprightLetter('Ρ'), "Rightarrow" => Token::Operator(ops::RIGHTWARDS_DOUBLE_ARROW), @@ -195,7 +200,10 @@ static COMMANDS: phf::Map<&'static str, Token> = phf::phf_map! { "curlywedge" => Token::Operator(ops::CURLY_LOGICAL_AND), "curvearrowleft" => Token::Operator(ops::ANTICLOCKWISE_TOP_SEMICIRCLE_ARROW), "curvearrowright" => Token::Operator(ops::CLOCKWISE_TOP_SEMICIRCLE_ARROW), - "d" => Token::Letter('d'), + "d" => Token::PredefinedNode(&Node::TextTransform { + tf: MathVariant::Normal, + content: &Node::SingleLetterIdent('d', false), + }), "dag" => Token::Letter('†'), "dagger" => Token::Letter('†'), "daleth" => Token::Letter('ℸ'), @@ -449,7 +457,7 @@ static COMMANDS: phf::Map<&'static str, Token> = phf::phf_map! { "overset" => Token::Overset, "parallel" => Token::Operator(ops::PARALLEL_TO), "partial" => Token::Letter(ops::PARTIAL_DIFFERENTIAL), - "perp" => Token::Letter(ops::UP_TACK), + "perp" => Token::Operator(ops::PERPENDICULAR), "phi" => Token::Letter('ϕ'), "pi" => Token::Letter('π'), "pm" => Token::Operator(ops::PLUS_MINUS_SIGN), diff --git a/latex2mmlc/src/lib.rs b/latex2mmlc/src/lib.rs index e021c5c..c69777b 100644 --- a/latex2mmlc/src/lib.rs +++ b/latex2mmlc/src/lib.rs @@ -57,6 +57,7 @@ pub(crate) mod lexer; pub(crate) mod ops; pub(crate) mod parse; pub mod token; + pub use ast::MathMLEmitter; pub use error::{LatexErrKind, LatexError}; @@ -313,6 +314,9 @@ mod tests { ), ("middle_bracket", r"\left(\frac12\middle]\frac12\right)"), ("left_right_different_stretch", r"\left/\frac12\right)"), + ("d_command", r"\d"), + ("d_command_nested", r"\mathit{x\d x}"), + ("RR_command", r"\RR"), ]; for (name, problem) in problems.into_iter() { diff --git a/latex2mmlc/src/ops.rs b/latex2mmlc/src/ops.rs index 6c7be31..c11c7d1 100644 --- a/latex2mmlc/src/ops.rs +++ b/latex2mmlc/src/ops.rs @@ -538,6 +538,7 @@ pub(crate) const BLACK_STAR: char = '★'; // // Unicode Block: Miscellaneous Mathematical Symbols-A // +pub(crate) const PERPENDICULAR: Op = Op('⊥'); pub(crate) const MATHEMATICAL_LEFT_WHITE_SQUARE_BRACKET: &ParenOp = &ParenOp('⟦', false, Stretchy::Always); pub(crate) const MATHEMATICAL_RIGHT_WHITE_SQUARE_BRACKET: &ParenOp = diff --git a/latex2mmlc/src/parse.rs b/latex2mmlc/src/parse.rs index d795432..7bd13bb 100644 --- a/latex2mmlc/src/parse.rs +++ b/latex2mmlc/src/parse.rs @@ -661,6 +661,7 @@ where Token::End | Token::Right | Token::GroupEnd => { return Err(LatexError(loc, LatexErrKind::UnexpectedClose(cur_token))) } + Token::PredefinedNode(node) => Node::PredefinedNode(node), }; Ok(self.commit(node)) } diff --git a/latex2mmlc/src/snapshots/latex2mmlc__tests__RR_command.snap b/latex2mmlc/src/snapshots/latex2mmlc__tests__RR_command.snap new file mode 100644 index 0000000..d6e0510 --- /dev/null +++ b/latex2mmlc/src/snapshots/latex2mmlc__tests__RR_command.snap @@ -0,0 +1,7 @@ +--- +source: latex2mmlc/src/lib.rs +expression: "\\RR" +--- + + + diff --git a/latex2mmlc/src/snapshots/latex2mmlc__tests__d_command.snap b/latex2mmlc/src/snapshots/latex2mmlc__tests__d_command.snap new file mode 100644 index 0000000..a28c1dc --- /dev/null +++ b/latex2mmlc/src/snapshots/latex2mmlc__tests__d_command.snap @@ -0,0 +1,7 @@ +--- +source: latex2mmlc/src/lib.rs +expression: "\\d" +--- + + d + diff --git a/latex2mmlc/src/snapshots/latex2mmlc__tests__d_command_nested.snap b/latex2mmlc/src/snapshots/latex2mmlc__tests__d_command_nested.snap new file mode 100644 index 0000000..b1f51dc --- /dev/null +++ b/latex2mmlc/src/snapshots/latex2mmlc__tests__d_command_nested.snap @@ -0,0 +1,11 @@ +--- +source: latex2mmlc/src/lib.rs +expression: "\\mathit{x\\d x}" +--- + + + 𝑥 + d + 𝑥 + + diff --git a/latex2mmlc/tests/wiki_test.rs b/latex2mmlc/tests/wiki_test.rs index a6f02e5..5644d11 100644 --- a/latex2mmlc/tests/wiki_test.rs +++ b/latex2mmlc/tests/wiki_test.rs @@ -1,5 +1,6 @@ +use std::sync::LazyLock; + use insta::assert_snapshot; -use lazy_static::lazy_static; use regex::Regex; // use similar::{ChangeTag, TextDiff}; @@ -268,9 +269,7 @@ fn wiki_test() { /// Prettify HTML input pub fn prettify(input: &str) -> String { - lazy_static! { - static ref OPEN_TAG: Regex = Regex::new("(?P<[A-z])").unwrap(); - } + static OPEN_TAG: LazyLock = LazyLock::new(|| Regex::new("(?P<[A-z])").unwrap()); // First get all tags on their own lines let mut stage1 = input.to_string(); From a7aa4571a78ab9df46f2bd8d5517949ea0f634d2 Mon Sep 17 00:00:00 2001 From: Thomas MK Date: Mon, 3 Feb 2025 15:53:52 +0100 Subject: [PATCH 3/3] Use correct character for perpendicular --- latex2mmlc/src/ops.rs | 2 +- latex2mmlc/tests/snapshots/wiki_test__wiki068.snap | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/latex2mmlc/src/ops.rs b/latex2mmlc/src/ops.rs index c11c7d1..f4f8541 100644 --- a/latex2mmlc/src/ops.rs +++ b/latex2mmlc/src/ops.rs @@ -538,7 +538,7 @@ pub(crate) const BLACK_STAR: char = '★'; // // Unicode Block: Miscellaneous Mathematical Symbols-A // -pub(crate) const PERPENDICULAR: Op = Op('⊥'); +pub(crate) const PERPENDICULAR: Op = Op('⟂'); pub(crate) const MATHEMATICAL_LEFT_WHITE_SQUARE_BRACKET: &ParenOp = &ParenOp('⟦', false, Stretchy::Always); pub(crate) const MATHEMATICAL_RIGHT_WHITE_SQUARE_BRACKET: &ParenOp = diff --git a/latex2mmlc/tests/snapshots/wiki_test__wiki068.snap b/latex2mmlc/tests/snapshots/wiki_test__wiki068.snap index 0a26c41..d97b0ce 100644 --- a/latex2mmlc/tests/snapshots/wiki_test__wiki068.snap +++ b/latex2mmlc/tests/snapshots/wiki_test__wiki068.snap @@ -3,7 +3,7 @@ source: latex2mmlc/tests/wiki_test.rs expression: "\\perp, \\angle, \\sphericalangle, \\measuredangle, 45^\\circ" --- - + , ,