Skip to content

Commit

Permalink
Make sure nodes are constructed directly on the heap (#179)
Browse files Browse the repository at this point in the history
  • Loading branch information
tmke8 authored Aug 12, 2024
2 parents c0b7eac + b3f89fd commit db1744e
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 55 deletions.
24 changes: 18 additions & 6 deletions latex2mmlc/src/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,14 @@ pub enum Node<'source> {
},
MultiLetterIdent(StrReference),
Space(&'static str),
Subscript(NodeReference, NodeReference),
Superscript(NodeReference, NodeReference),
Subscript {
target: NodeReference,
symbol: NodeReference,
},
Superscript {
target: NodeReference,
symbol: NodeReference,
},
SubSup {
target: NodeReference,
sub: NodeReference,
Expand Down Expand Up @@ -158,8 +164,14 @@ impl<'source> Node<'source> {
}
Node::Space(space) => push!(s, "<mspace width=\"", space, "em\"/>"),
// The following nodes have exactly two children.
node @ (Node::Subscript(first, second)
| Node::Superscript(first, second)
node @ (Node::Subscript {
symbol: second,
target: first,
}
| Node::Superscript {
symbol: second,
target: first,
}
| Node::Overset {
symbol: second,
target: first,
Expand All @@ -170,8 +182,8 @@ impl<'source> Node<'source> {
}
| Node::Root(second, first)) => {
let (open, close) = match node {
Node::Subscript(_, _) => ("<msub>", "</msub>"),
Node::Superscript(_, _) => ("<msup>", "</msup>"),
Node::Subscript { .. } => ("<msub>", "</msub>"),
Node::Superscript { .. } => ("<msup>", "</msup>"),
Node::Overset { .. } => ("<mover>", "</mover>"),
Node::Underset { .. } => ("<munder>", "</munder>"),
Node::Root(_, _) => ("<mroot>", "</mroot>"),
Expand Down
1 change: 1 addition & 0 deletions latex2mmlc/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ mod tests {
("black_board_font", r"\mathbb{R}"),
("sum_with_special_symbol", r"\sum_{i = 0}^∞ i"),
("sum_with_limit", r"\sum\limits_{i=1}^N"),
("sum_pointless_limits", r"\sum\limits n"),
("product", r"\prod_n n"),
("underscore", r"x\ y"),
("stretchy_brace", r"\left\{ x ( x + 2 ) \right\}"),
Expand Down
111 changes: 62 additions & 49 deletions latex2mmlc/src/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,29 +68,32 @@ impl<'source> Parser<'source> {
&mut self,
cur_tokloc: TokLoc<'source>,
) -> Result<NodeReference, LatexError<'source>> {
let left = self.parse_single_node(cur_tokloc)?;
let target = self.parse_single_node(cur_tokloc)?;

match self.get_bounds()? {
Bounds(Some(sub), Some(sup)) => Ok(self.commit_node(Node::SubSup {
target: left,
sub,
sup,
})),
Bounds(Some(symbol), None) => Ok(self.commit_node(Node::Subscript(left, symbol))),
Bounds(None, Some(symbol)) => Ok(self.commit_node(Node::Superscript(left, symbol))),
Bounds(None, None) => Ok(left),
Bounds(Some(sub), Some(sup)) => Ok(self.commit(Node::SubSup { target, sub, sup })),
Bounds(Some(symbol), None) => Ok(self.commit(Node::Subscript { target, symbol })),
Bounds(None, Some(symbol)) => Ok(self.commit(Node::Superscript { target, symbol })),
Bounds(None, None) => Ok(target),
}
}

fn commit_node(&mut self, node: Node<'source>) -> NodeReference {
/// Put the node onto the heap in the arena and return a reference to it.
///
/// The advantage over using `Box` is that we can store the nodes in a contiguous
/// memory block, and release all of them at once when the arena is dropped.
///
/// Ideally, the node is constructed directly on the heap, so try to avoid
/// constructing it on the stack and then moving it to the heap.
fn commit(&mut self, node: Node<'source>) -> NodeReference {
self.arena.push(node)
}

// Read the node immediately after without worrying about whether
// the infix operator `_`, `^`, `'` will continue
//
// Note: Use `parse_node()` when reading nodes correctly in
// consideration of infix operators.
/// Read the node immediately after without worrying about whether
/// the infix operator `_`, `^`, `'` will continue
///
/// Use this function only if you are sure that this is what you need.
/// Otherwise, use `parse_node` instead.
fn parse_single_node(
&mut self,
cur_tokloc: TokLoc<'source>,
Expand All @@ -106,8 +109,8 @@ impl<'source> Parser<'source> {
Some(tf) => Node::MultiLetterIdent(self.buffer.transform_and_push(number, tf)),
None => Node::Number(number),
};
let first = self.commit_node(num);
let second = self.commit_node(match tok {
let first = self.commit(num);
let second = self.commit(match tok {
Token::NumberWithDot(_) => Node::SingleLetterIdent('.', None),
Token::NumberWithComma(_) => Node::Operator(ops::COMMA, None),
_ => unreachable!(),
Expand Down Expand Up @@ -146,11 +149,15 @@ impl<'source> Parser<'source> {
let numerator = self.parse_token()?;
let denominator = self.parse_token()?;
if matches!(cur_token, Token::Binom(_)) {
let inner = Node::Frac(numerator, denominator, Some('0'), displaystyle);
Node::Fenced {
open: ops::LEFT_PARENTHESIS,
close: ops::RIGHT_PARENTHESIS,
content: self.commit_node(inner),
content: self.commit(Node::Frac(
numerator,
denominator,
Some('0'),
displaystyle,
)),
style: None,
}
} else {
Expand Down Expand Up @@ -195,8 +202,7 @@ impl<'source> Parser<'source> {
};
let numerator = self.parse_token()?;
let denominator = self.parse_token()?;
let inner = Node::Frac(numerator, denominator, line_thickness, None);
let content = self.commit_node(inner);
let content = self.commit(Node::Frac(numerator, denominator, line_thickness, None));
Node::Fenced {
open,
close,
Expand Down Expand Up @@ -229,22 +235,22 @@ impl<'source> Parser<'source> {
{
self.next_token(); // Discard the circumflex or underscore token.
let expl = self.parse_single_token()?;
let op = self.commit_node(Node::Operator(x, None));
let op = self.commit(Node::Operator(x, None));
if is_over {
let symbol = self.commit_node(Node::Overset {
let symbol = self.commit(Node::Overset {
symbol: expl,
target: op,
});
Node::Overset { symbol, target }
} else {
let symbol = self.commit_node(Node::Underset {
let symbol = self.commit(Node::Underset {
symbol: expl,
target: op,
});
Node::Underset { symbol, target }
}
} else {
let symbol = self.commit_node(Node::Operator(x, None));
let symbol = self.commit(Node::Operator(x, None));
if is_over {
Node::Overset { symbol, target }
} else {
Expand All @@ -255,11 +261,10 @@ impl<'source> Parser<'source> {
Token::BigOp(op) => {
let target = if matches!(self.peek.token(), Token::Limits) {
self.next_token(); // Discard the limits token.
Node::Operator(op, Some(OpAttr::NoMovableLimits))
self.commit(Node::Operator(op, Some(OpAttr::NoMovableLimits)))
} else {
Node::Operator(op, None)
self.commit(Node::Operator(op, None))
};
let target = self.commit_node(target);
match self.get_bounds()? {
Bounds(Some(under), Some(over)) => Node::UnderOver {
target,
Expand All @@ -268,26 +273,30 @@ impl<'source> Parser<'source> {
},
Bounds(Some(symbol), None) => Node::Underset { target, symbol },
Bounds(None, Some(symbol)) => Node::Overset { target, symbol },
Bounds(None, None) => Node::Operator(op, None),
Bounds(None, None) => {
return Ok(target);
}
}
}
Token::Lim(lim) => {
let lim = Node::MultiLetterIdent(self.buffer.push_str(lim));
let lim_name = self.buffer.push_str(lim);
let lim = self.commit(Node::MultiLetterIdent(lim_name));
if matches!(self.peek.token(), Token::Underscore) {
self.next_token(); // Discard the underscore token.
let under = self.parse_single_token()?;
Node::Underset {
target: self.commit_node(lim),
target: lim,
symbol: under,
}
} else {
lim
return Ok(lim);
}
}
Token::Slashed => {
self.next_token(); // Optimistically skip the next token.
// TODO: Actually check the braces.
self.next_token(); // Optimistically assume that the next token is `{`
let node = self.parse_token()?;
self.next_token(); // Optimistically skip the next token.
self.next_token(); // Optimistically assume that the next token is `}`
Node::Slashed(node)
}
Token::Not => {
Expand Down Expand Up @@ -344,7 +353,7 @@ impl<'source> Parser<'source> {
Token::Integral(int) => {
if matches!(self.peek.token(), Token::Limits) {
self.next_token(); // Discard the limits token.
let target = self.commit_node(Node::Operator(int, None));
let target = self.commit(Node::Operator(int, None));
match self.get_bounds()? {
Bounds(Some(under), Some(over)) => Node::UnderOver {
target,
Expand All @@ -353,28 +362,32 @@ impl<'source> Parser<'source> {
},
Bounds(Some(symbol), None) => Node::Underset { target, symbol },
Bounds(None, Some(symbol)) => Node::Overset { target, symbol },
Bounds(None, None) => Node::Operator(int, None),
Bounds(None, None) => {
return Ok(target);
}
}
} else {
let target = self.commit_node(Node::Operator(int, None));
let target = self.commit(Node::Operator(int, None));
match self.get_bounds()? {
Bounds(Some(sub), Some(sup)) => Node::SubSup { target, sub, sup },
Bounds(Some(symbol), None) => Node::Subscript(target, symbol),
Bounds(None, Some(symbol)) => Node::Superscript(target, symbol),
Bounds(None, None) => Node::Operator(int, None),
Bounds(Some(symbol), None) => Node::Subscript { target, symbol },
Bounds(None, Some(symbol)) => Node::Superscript { target, symbol },
Bounds(None, None) => {
return Ok(target);
}
}
}
}
Token::Colon => match &self.peek.token() {
Token::Operator(op @ (ops::EQUALS_SIGN | ops::IDENTICAL_TO)) => {
let op = *op;
self.next_token(); // Discard the operator token.
let first = self.commit_node(Node::OperatorWithSpacing {
let first = self.commit(Node::OperatorWithSpacing {
op: ops::COLON,
left: Some(MathSpacing::FourMu),
right: Some(MathSpacing::Zero),
});
let second = self.commit_node(Node::OperatorWithSpacing {
let second = self.commit(Node::OperatorWithSpacing {
op,
left: Some(MathSpacing::Zero),
right: None,
Expand Down Expand Up @@ -484,17 +497,17 @@ impl<'source> Parser<'source> {
let node = match env_name {
"align" | "align*" | "aligned" => Node::Table(env_content, Align::Alternating),
"cases" => {
let content = Node::Table(env_content, Align::Left);
let content = self.commit(Node::Table(env_content, Align::Left));
Node::Fenced {
open: ops::LEFT_CURLY_BRACKET,
close: ops::NULL,
content: self.commit_node(content),
content,
style: None,
}
}
"matrix" => Node::Table(env_content, Align::Center),
matrix_variant @ ("pmatrix" | "bmatrix" | "vmatrix") => {
let content = Node::Table(env_content, Align::Center);
let content = self.commit(Node::Table(env_content, Align::Center));
let (open, close) = match matrix_variant {
"pmatrix" => (ops::LEFT_PARENTHESIS, ops::RIGHT_PARENTHESIS),
"bmatrix" => (ops::LEFT_SQUARE_BRACKET, ops::RIGHT_SQUARE_BRACKET),
Expand All @@ -505,7 +518,7 @@ impl<'source> Parser<'source> {
Node::Fenced {
open,
close,
content: self.commit_node(content),
content,
style: None,
}
}
Expand Down Expand Up @@ -587,7 +600,7 @@ impl<'source> Parser<'source> {
return Err(LatexError(loc, LatexErrKind::UnexpectedClose(cur_token)))
}
};
Ok(self.commit_node(node))
Ok(self.commit(node))
}

#[inline]
Expand Down Expand Up @@ -655,7 +668,7 @@ impl<'source> Parser<'source> {
let mut primes = NodeListBuilder::new();
while matches!(self.peek.token(), Token::Prime) {
self.next_token(); // Discard the prime token.
let node_ref = self.commit_node(Node::Operator(ops::PRIME, None));
let node_ref = self.commit(Node::Operator(ops::PRIME, None));
primes.push(&mut self.arena, node_ref);
}

Expand Down Expand Up @@ -732,7 +745,7 @@ impl<'source> Parser<'source> {
fn squeeze(&mut self, list_builder: NodeListBuilder, style: Option<Style>) -> NodeReference {
match list_builder.as_singleton_or_finish() {
SingletonOrList::Singleton(value) => value,
SingletonOrList::List(list) => self.commit_node(Node::Row(list, style)),
SingletonOrList::List(list) => self.commit(Node::Row(list, style)),
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
---
source: latex2mmlc/src/lib.rs
expression: "\\sum\\limits n"
---
<math>
<mo movablelimits="false">∑</mo>
<mi>n</mi>
</math>

0 comments on commit db1744e

Please sign in to comment.