From 49f0529aa596d2d02a7dc876bc8e9a156570162e Mon Sep 17 00:00:00 2001 From: Thomas M Kehrenberg Date: Mon, 12 Aug 2024 14:35:33 +0200 Subject: [PATCH 1/2] Make sure nodes are constructed directly on the heap --- latex2mmlc/src/ast.rs | 24 ++++-- latex2mmlc/src/lib.rs | 1 + latex2mmlc/src/parse.rs | 86 +++++++++++-------- ...tex2mmlc__tests__sum_pointless_limits.snap | 8 ++ 4 files changed, 78 insertions(+), 41 deletions(-) create mode 100644 latex2mmlc/src/snapshots/latex2mmlc__tests__sum_pointless_limits.snap diff --git a/latex2mmlc/src/ast.rs b/latex2mmlc/src/ast.rs index ac1f6ea..2f5a787 100644 --- a/latex2mmlc/src/ast.rs +++ b/latex2mmlc/src/ast.rs @@ -18,8 +18,14 @@ pub enum Node<'source> { }, MultiLetterIdent(StrReference), Space(&'static str), - Subscript(NodeReference, NodeReference), - Superscript(NodeReference, NodeReference), + Subscript { + target: NodeReference, + symbol: NodeReference, + }, + Superscript { + target: NodeReference, + symbol: NodeReference, + }, SubSup { target: NodeReference, sub: NodeReference, @@ -158,8 +164,14 @@ impl<'source> Node<'source> { } Node::Space(space) => push!(s, ""), // The following nodes have exactly two children. - node @ (Node::Subscript(first, second) - | Node::Superscript(first, second) + node @ (Node::Subscript { + symbol: second, + target: first, + } + | Node::Superscript { + symbol: second, + target: first, + } | Node::Overset { symbol: second, target: first, @@ -170,8 +182,8 @@ impl<'source> Node<'source> { } | Node::Root(second, first)) => { let (open, close) = match node { - Node::Subscript(_, _) => ("", ""), - Node::Superscript(_, _) => ("", ""), + Node::Subscript { .. } => ("", ""), + Node::Superscript { .. } => ("", ""), Node::Overset { .. } => ("", ""), Node::Underset { .. } => ("", ""), Node::Root(_, _) => ("", ""), diff --git a/latex2mmlc/src/lib.rs b/latex2mmlc/src/lib.rs index 3e83f6c..37d7798 100644 --- a/latex2mmlc/src/lib.rs +++ b/latex2mmlc/src/lib.rs @@ -203,6 +203,7 @@ mod tests { ("black_board_font", r"\mathbb{R}"), ("sum_with_special_symbol", r"\sum_{i = 0}^∞ i"), ("sum_with_limit", r"\sum\limits_{i=1}^N"), + ("sum_pointless_limits", r"\sum\limits n"), ("product", r"\prod_n n"), ("underscore", r"x\ y"), ("stretchy_brace", r"\left\{ x ( x + 2 ) \right\}"), diff --git a/latex2mmlc/src/parse.rs b/latex2mmlc/src/parse.rs index 1a178e1..7a0155c 100644 --- a/latex2mmlc/src/parse.rs +++ b/latex2mmlc/src/parse.rs @@ -68,29 +68,34 @@ impl<'source> Parser<'source> { &mut self, cur_tokloc: TokLoc<'source>, ) -> Result> { - let left = self.parse_single_node(cur_tokloc)?; + let target = self.parse_single_node(cur_tokloc)?; match self.get_bounds()? { - Bounds(Some(sub), Some(sup)) => Ok(self.commit_node(Node::SubSup { - target: left, - sub, - sup, - })), - Bounds(Some(symbol), None) => Ok(self.commit_node(Node::Subscript(left, symbol))), - Bounds(None, Some(symbol)) => Ok(self.commit_node(Node::Superscript(left, symbol))), - Bounds(None, None) => Ok(left), + Bounds(Some(sub), Some(sup)) => Ok(self.commit_node(Node::SubSup { target, sub, sup })), + Bounds(Some(symbol), None) => Ok(self.commit_node(Node::Subscript { target, symbol })), + Bounds(None, Some(symbol)) => { + Ok(self.commit_node(Node::Superscript { target, symbol })) + } + Bounds(None, None) => Ok(target), } } + /// Put the node onto the heap in the arena and return a reference to it. + /// + /// The advantage over using `Box` is that we can store the nodes in a contiguous + /// memory block, and release all of them at once when the arena is dropped. + /// + /// Ideally, the node is constructed directly on the heap, so try to avoid + /// constructing it on the stack and then moving it to the heap. fn commit_node(&mut self, node: Node<'source>) -> NodeReference { self.arena.push(node) } - // Read the node immediately after without worrying about whether - // the infix operator `_`, `^`, `'` will continue - // - // Note: Use `parse_node()` when reading nodes correctly in - // consideration of infix operators. + /// Read the node immediately after without worrying about whether + /// the infix operator `_`, `^`, `'` will continue + /// + /// Use this function only if you are sure that this is what you need. + /// Otherwise, use `parse_node` instead. fn parse_single_node( &mut self, cur_tokloc: TokLoc<'source>, @@ -146,11 +151,15 @@ impl<'source> Parser<'source> { let numerator = self.parse_token()?; let denominator = self.parse_token()?; if matches!(cur_token, Token::Binom(_)) { - let inner = Node::Frac(numerator, denominator, Some('0'), displaystyle); Node::Fenced { open: ops::LEFT_PARENTHESIS, close: ops::RIGHT_PARENTHESIS, - content: self.commit_node(inner), + content: self.commit_node(Node::Frac( + numerator, + denominator, + Some('0'), + displaystyle, + )), style: None, } } else { @@ -195,8 +204,8 @@ impl<'source> Parser<'source> { }; let numerator = self.parse_token()?; let denominator = self.parse_token()?; - let inner = Node::Frac(numerator, denominator, line_thickness, None); - let content = self.commit_node(inner); + let content = + self.commit_node(Node::Frac(numerator, denominator, line_thickness, None)); Node::Fenced { open, close, @@ -255,11 +264,10 @@ impl<'source> Parser<'source> { Token::BigOp(op) => { let target = if matches!(self.peek.token(), Token::Limits) { self.next_token(); // Discard the limits token. - Node::Operator(op, Some(OpAttr::NoMovableLimits)) + self.commit_node(Node::Operator(op, Some(OpAttr::NoMovableLimits))) } else { - Node::Operator(op, None) + self.commit_node(Node::Operator(op, None)) }; - let target = self.commit_node(target); match self.get_bounds()? { Bounds(Some(under), Some(over)) => Node::UnderOver { target, @@ -268,26 +276,30 @@ impl<'source> Parser<'source> { }, Bounds(Some(symbol), None) => Node::Underset { target, symbol }, Bounds(None, Some(symbol)) => Node::Overset { target, symbol }, - Bounds(None, None) => Node::Operator(op, None), + Bounds(None, None) => { + return Ok(target); + } } } Token::Lim(lim) => { - let lim = Node::MultiLetterIdent(self.buffer.push_str(lim)); + let lim_name = self.buffer.push_str(lim); + let lim = self.commit_node(Node::MultiLetterIdent(lim_name)); if matches!(self.peek.token(), Token::Underscore) { self.next_token(); // Discard the underscore token. let under = self.parse_single_token()?; Node::Underset { - target: self.commit_node(lim), + target: lim, symbol: under, } } else { - lim + return Ok(lim); } } Token::Slashed => { - self.next_token(); // Optimistically skip the next token. + // TODO: Actually check the braces. + self.next_token(); // Optimistically assume that the next token is `{` let node = self.parse_token()?; - self.next_token(); // Optimistically skip the next token. + self.next_token(); // Optimistically assume that the next token is `}` Node::Slashed(node) } Token::Not => { @@ -353,15 +365,19 @@ impl<'source> Parser<'source> { }, Bounds(Some(symbol), None) => Node::Underset { target, symbol }, Bounds(None, Some(symbol)) => Node::Overset { target, symbol }, - Bounds(None, None) => Node::Operator(int, None), + Bounds(None, None) => { + return Ok(target); + } } } else { let target = self.commit_node(Node::Operator(int, None)); match self.get_bounds()? { Bounds(Some(sub), Some(sup)) => Node::SubSup { target, sub, sup }, - Bounds(Some(symbol), None) => Node::Subscript(target, symbol), - Bounds(None, Some(symbol)) => Node::Superscript(target, symbol), - Bounds(None, None) => Node::Operator(int, None), + Bounds(Some(symbol), None) => Node::Subscript { target, symbol }, + Bounds(None, Some(symbol)) => Node::Superscript { target, symbol }, + Bounds(None, None) => { + return Ok(target); + } } } } @@ -484,17 +500,17 @@ impl<'source> Parser<'source> { let node = match env_name { "align" | "align*" | "aligned" => Node::Table(env_content, Align::Alternating), "cases" => { - let content = Node::Table(env_content, Align::Left); + let content = self.commit_node(Node::Table(env_content, Align::Left)); Node::Fenced { open: ops::LEFT_CURLY_BRACKET, close: ops::NULL, - content: self.commit_node(content), + content, style: None, } } "matrix" => Node::Table(env_content, Align::Center), matrix_variant @ ("pmatrix" | "bmatrix" | "vmatrix") => { - let content = Node::Table(env_content, Align::Center); + let content = self.commit_node(Node::Table(env_content, Align::Center)); let (open, close) = match matrix_variant { "pmatrix" => (ops::LEFT_PARENTHESIS, ops::RIGHT_PARENTHESIS), "bmatrix" => (ops::LEFT_SQUARE_BRACKET, ops::RIGHT_SQUARE_BRACKET), @@ -505,7 +521,7 @@ impl<'source> Parser<'source> { Node::Fenced { open, close, - content: self.commit_node(content), + content, style: None, } } diff --git a/latex2mmlc/src/snapshots/latex2mmlc__tests__sum_pointless_limits.snap b/latex2mmlc/src/snapshots/latex2mmlc__tests__sum_pointless_limits.snap new file mode 100644 index 0000000..6baca69 --- /dev/null +++ b/latex2mmlc/src/snapshots/latex2mmlc__tests__sum_pointless_limits.snap @@ -0,0 +1,8 @@ +--- +source: latex2mmlc/src/lib.rs +expression: "\\sum\\limits n" +--- + + + n + From b3f89fd17d10ec2da9e74177b0b3c4bb7565d1b6 Mon Sep 17 00:00:00 2001 From: Thomas M Kehrenberg Date: Mon, 12 Aug 2024 14:37:46 +0200 Subject: [PATCH 2/2] Rename `commit_node()` to just `commit()` --- latex2mmlc/src/parse.rs | 51 +++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 27 deletions(-) diff --git a/latex2mmlc/src/parse.rs b/latex2mmlc/src/parse.rs index 7a0155c..cb673e7 100644 --- a/latex2mmlc/src/parse.rs +++ b/latex2mmlc/src/parse.rs @@ -71,11 +71,9 @@ impl<'source> Parser<'source> { let target = self.parse_single_node(cur_tokloc)?; match self.get_bounds()? { - Bounds(Some(sub), Some(sup)) => Ok(self.commit_node(Node::SubSup { target, sub, sup })), - Bounds(Some(symbol), None) => Ok(self.commit_node(Node::Subscript { target, symbol })), - Bounds(None, Some(symbol)) => { - Ok(self.commit_node(Node::Superscript { target, symbol })) - } + Bounds(Some(sub), Some(sup)) => Ok(self.commit(Node::SubSup { target, sub, sup })), + Bounds(Some(symbol), None) => Ok(self.commit(Node::Subscript { target, symbol })), + Bounds(None, Some(symbol)) => Ok(self.commit(Node::Superscript { target, symbol })), Bounds(None, None) => Ok(target), } } @@ -87,7 +85,7 @@ impl<'source> Parser<'source> { /// /// Ideally, the node is constructed directly on the heap, so try to avoid /// constructing it on the stack and then moving it to the heap. - fn commit_node(&mut self, node: Node<'source>) -> NodeReference { + fn commit(&mut self, node: Node<'source>) -> NodeReference { self.arena.push(node) } @@ -111,8 +109,8 @@ impl<'source> Parser<'source> { Some(tf) => Node::MultiLetterIdent(self.buffer.transform_and_push(number, tf)), None => Node::Number(number), }; - let first = self.commit_node(num); - let second = self.commit_node(match tok { + let first = self.commit(num); + let second = self.commit(match tok { Token::NumberWithDot(_) => Node::SingleLetterIdent('.', None), Token::NumberWithComma(_) => Node::Operator(ops::COMMA, None), _ => unreachable!(), @@ -154,7 +152,7 @@ impl<'source> Parser<'source> { Node::Fenced { open: ops::LEFT_PARENTHESIS, close: ops::RIGHT_PARENTHESIS, - content: self.commit_node(Node::Frac( + content: self.commit(Node::Frac( numerator, denominator, Some('0'), @@ -204,8 +202,7 @@ impl<'source> Parser<'source> { }; let numerator = self.parse_token()?; let denominator = self.parse_token()?; - let content = - self.commit_node(Node::Frac(numerator, denominator, line_thickness, None)); + let content = self.commit(Node::Frac(numerator, denominator, line_thickness, None)); Node::Fenced { open, close, @@ -238,22 +235,22 @@ impl<'source> Parser<'source> { { self.next_token(); // Discard the circumflex or underscore token. let expl = self.parse_single_token()?; - let op = self.commit_node(Node::Operator(x, None)); + let op = self.commit(Node::Operator(x, None)); if is_over { - let symbol = self.commit_node(Node::Overset { + let symbol = self.commit(Node::Overset { symbol: expl, target: op, }); Node::Overset { symbol, target } } else { - let symbol = self.commit_node(Node::Underset { + let symbol = self.commit(Node::Underset { symbol: expl, target: op, }); Node::Underset { symbol, target } } } else { - let symbol = self.commit_node(Node::Operator(x, None)); + let symbol = self.commit(Node::Operator(x, None)); if is_over { Node::Overset { symbol, target } } else { @@ -264,9 +261,9 @@ impl<'source> Parser<'source> { Token::BigOp(op) => { let target = if matches!(self.peek.token(), Token::Limits) { self.next_token(); // Discard the limits token. - self.commit_node(Node::Operator(op, Some(OpAttr::NoMovableLimits))) + self.commit(Node::Operator(op, Some(OpAttr::NoMovableLimits))) } else { - self.commit_node(Node::Operator(op, None)) + self.commit(Node::Operator(op, None)) }; match self.get_bounds()? { Bounds(Some(under), Some(over)) => Node::UnderOver { @@ -283,7 +280,7 @@ impl<'source> Parser<'source> { } Token::Lim(lim) => { let lim_name = self.buffer.push_str(lim); - let lim = self.commit_node(Node::MultiLetterIdent(lim_name)); + let lim = self.commit(Node::MultiLetterIdent(lim_name)); if matches!(self.peek.token(), Token::Underscore) { self.next_token(); // Discard the underscore token. let under = self.parse_single_token()?; @@ -356,7 +353,7 @@ impl<'source> Parser<'source> { Token::Integral(int) => { if matches!(self.peek.token(), Token::Limits) { self.next_token(); // Discard the limits token. - let target = self.commit_node(Node::Operator(int, None)); + let target = self.commit(Node::Operator(int, None)); match self.get_bounds()? { Bounds(Some(under), Some(over)) => Node::UnderOver { target, @@ -370,7 +367,7 @@ impl<'source> Parser<'source> { } } } else { - let target = self.commit_node(Node::Operator(int, None)); + let target = self.commit(Node::Operator(int, None)); match self.get_bounds()? { Bounds(Some(sub), Some(sup)) => Node::SubSup { target, sub, sup }, Bounds(Some(symbol), None) => Node::Subscript { target, symbol }, @@ -385,12 +382,12 @@ impl<'source> Parser<'source> { Token::Operator(op @ (ops::EQUALS_SIGN | ops::IDENTICAL_TO)) => { let op = *op; self.next_token(); // Discard the operator token. - let first = self.commit_node(Node::OperatorWithSpacing { + let first = self.commit(Node::OperatorWithSpacing { op: ops::COLON, left: Some(MathSpacing::FourMu), right: Some(MathSpacing::Zero), }); - let second = self.commit_node(Node::OperatorWithSpacing { + let second = self.commit(Node::OperatorWithSpacing { op, left: Some(MathSpacing::Zero), right: None, @@ -500,7 +497,7 @@ impl<'source> Parser<'source> { let node = match env_name { "align" | "align*" | "aligned" => Node::Table(env_content, Align::Alternating), "cases" => { - let content = self.commit_node(Node::Table(env_content, Align::Left)); + let content = self.commit(Node::Table(env_content, Align::Left)); Node::Fenced { open: ops::LEFT_CURLY_BRACKET, close: ops::NULL, @@ -510,7 +507,7 @@ impl<'source> Parser<'source> { } "matrix" => Node::Table(env_content, Align::Center), matrix_variant @ ("pmatrix" | "bmatrix" | "vmatrix") => { - let content = self.commit_node(Node::Table(env_content, Align::Center)); + let content = self.commit(Node::Table(env_content, Align::Center)); let (open, close) = match matrix_variant { "pmatrix" => (ops::LEFT_PARENTHESIS, ops::RIGHT_PARENTHESIS), "bmatrix" => (ops::LEFT_SQUARE_BRACKET, ops::RIGHT_SQUARE_BRACKET), @@ -603,7 +600,7 @@ impl<'source> Parser<'source> { return Err(LatexError(loc, LatexErrKind::UnexpectedClose(cur_token))) } }; - Ok(self.commit_node(node)) + Ok(self.commit(node)) } #[inline] @@ -671,7 +668,7 @@ impl<'source> Parser<'source> { let mut primes = NodeListBuilder::new(); while matches!(self.peek.token(), Token::Prime) { self.next_token(); // Discard the prime token. - let node_ref = self.commit_node(Node::Operator(ops::PRIME, None)); + let node_ref = self.commit(Node::Operator(ops::PRIME, None)); primes.push(&mut self.arena, node_ref); } @@ -748,7 +745,7 @@ impl<'source> Parser<'source> { fn squeeze(&mut self, list_builder: NodeListBuilder, style: Option