Skip to content

Commit

Permalink
fix precedence parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
Geal committed Dec 8, 2024
1 parent 6c12469 commit f1abba6
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 80 deletions.
38 changes: 16 additions & 22 deletions src/precedence/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//! Combinators to parse expressions with operator precedence.
#![cfg(feature="alloc")]
#![cfg(feature = "alloc")]
#![cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))]

#[cfg(test)]
Expand Down Expand Up @@ -79,17 +79,11 @@ pub fn unary_op<I, O, E, P, Q>(
mut parser: P,
) -> impl FnMut(I) -> IResult<I, Unary<O, Q>, E>
where
P: Parser<I, O, E>,
P: Parser<I, Output = O, Error = E>,
Q: Ord + Copy,
{
move |input| match parser.parse(input) {
Ok((i, value)) => Ok((
i,
Unary {
value,
precedence,
},
)),
Ok((i, value)) => Ok((i, Unary { value, precedence })),
Err(e) => Err(e),
}
}
Expand All @@ -107,7 +101,7 @@ pub fn binary_op<I, O, E, P, Q>(
mut parser: P,
) -> impl FnMut(I) -> IResult<I, Binary<O, Q>, E>
where
P: Parser<I, O, E>,
P: Parser<I, Output = O, Error = E>,
Q: Ord + Copy,
{
move |input| match parser.parse(input) {
Expand All @@ -124,7 +118,7 @@ where
}

/// Parses an expression with operator precedence.
///
///
/// Supports prefix, postfix and binary operators. Operators are applied in ascending precedence.
///
/// The parser will track its current position inside the expression and call the respective
Expand All @@ -146,7 +140,7 @@ where
/// * `binary` Parser for binary operators.
/// * `operand` Parser for operands.
/// * `fold` Function that evaluates a single operation and returns the result.
///
///
/// # Example
/// ```rust
/// # use nom::{Err, error::{Error, ErrorKind}, IResult};
Expand All @@ -156,11 +150,11 @@ where
/// use nom::sequence::delimited;
/// use nom::bytes::complete::tag;
/// use nom::branch::alt;
///
///
/// fn parser(i: &str) -> IResult<&str, i64> {
/// precedence(
/// unary_op(1, tag("-")),
/// fail,
/// fail(),
/// alt((
/// binary_op(2, Assoc::Left, tag("*")),
/// binary_op(2, Assoc::Left, tag("/")),
Expand Down Expand Up @@ -189,19 +183,19 @@ where
/// assert_eq!(parser("4-(2+2)"), Ok(("", 0)));
/// assert_eq!(parser("3-(2*3)+7+2*2-(2*(2+4))"), Ok(("", -4)));
/// ```
///
///
/// # Evaluation order
/// This parser reads expressions from left to right and folds operations as soon as possible. This
/// behaviour is only important when using an operator grammar that allows for ambigious expressions.
///
///
/// For example, the expression `-a++**b` is ambigious with the following precedence.
///
///
/// | Operator | Position | Precedence | Associativity |
/// |----------|----------|------------|---------------|
/// | ** | Binary | 1 | Right |
/// | - | Prefix | 2 | N/A |
/// | ++ | Postfix | 3 | N/A |
///
///
/// The expression can be parsed in two ways: `-((a++)**b)` or `((-a)++)**b`. This parser will always
/// parse it as the latter because of how it evaluates expressions:
/// * It reads, left-to-right, the first two operators `-a++`.
Expand All @@ -220,11 +214,11 @@ pub fn precedence<I, O, E, E2, F, G, H1, H3, H2, P1, P2, P3, Q>(
where
I: Clone + PartialEq,
E: ParseError<I> + FromExternalError<I, E2>,
F: Parser<I, O, E>,
F: Parser<I, Output = O, Error = E>,
G: FnMut(Operation<P1, P2, P3, O>) -> Result<O, E2>,
H1: Parser<I, Unary<P1, Q>, E>,
H2: Parser<I, Unary<P2, Q>, E>,
H3: Parser<I, Binary<P3, Q>, E>,
H1: Parser<I, Output = Unary<P1, Q>, Error = E>,
H2: Parser<I, Output = Unary<P2, Q>, Error = E>,
H3: Parser<I, Output = Binary<P3, Q>, Error = E>,
Q: Ord + Copy,
{
move |mut i| {
Expand Down
14 changes: 7 additions & 7 deletions src/precedence/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@ use crate::{
branch::alt,
bytes::complete::tag,
character::complete::digit1,
combinator::{map_res, fail},
combinator::{fail, map_res},
error::ErrorKind,
internal::{Err, IResult},
sequence::delimited,
error::ErrorKind,
};

#[cfg(feature = "alloc")]
Expand All @@ -16,7 +16,7 @@ use crate::precedence::precedence;
fn parser(i: &str) -> IResult<&str, i64> {
precedence(
unary_op(1, tag("-")),
fail,
fail(),
alt((
binary_op(2, Assoc::Left, tag("*")),
binary_op(2, Assoc::Left, tag("/")),
Expand Down Expand Up @@ -50,9 +50,9 @@ fn precedence_test() {
assert_eq!(parser("4-2*2"), Ok(("", 0)));
assert_eq!(parser("(4-2)*2"), Ok(("", 4)));
assert_eq!(parser("2*2/1"), Ok(("", 4)));

let a = "a";

assert_eq!(
parser(a),
Err(Err::Error(error_node_position!(
Expand All @@ -61,9 +61,9 @@ fn precedence_test() {
error_position!(&a[..], ErrorKind::Tag)
)))
);

let b = "3+b";

assert_eq!(
parser(b),
Err(Err::Error(error_node_position!(
Expand Down
113 changes: 62 additions & 51 deletions tests/expression_ast.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
use nom::{
branch::alt,
bytes::complete::tag,
character::complete::{digit1 as digit, alphanumeric1 as alphanumeric},
combinator::{map_res, map},
character::complete::{alphanumeric1 as alphanumeric, digit1 as digit},
combinator::{map, map_res},
multi::separated_list0,
precedence::{binary_op, precedence, unary_op, Assoc, Operation},
sequence::delimited,
IResult,
precedence::{precedence, Assoc, binary_op, unary_op, Operation},
IResult, Parser,
};

// Elements of the abstract syntax tree (ast) that represents an expression.
Expand All @@ -29,25 +29,25 @@ pub enum Expr {

// Prefix operators.
enum PrefixOp {
Identity, // +
Negate, // -
Identity, // +
Negate, // -
}

// Postfix operators.
enum PostfixOp {
// The function call operator. In addition to its own representation "()" it carries additional information that we need to keep here.
// Specifically the vector of expressions that make up the parameters.
Call(Vec<Expr>), // ()
Call(Vec<Expr>), // ()
}

// Binary operators.
enum BinaryOp {
Addition, // +
Subtraction, // -
Multiplication, // *
Division, // /
Addition, // +
Subtraction, // -
Multiplication, // *
Division, // /
// The ternary operator can contain a single expression.
Ternary(Expr), // ?:
Ternary(Expr), // ?:
}

// Parser for function calls.
Expand All @@ -57,31 +57,28 @@ fn function_call(i: &str) -> IResult<&str, PostfixOp> {
tag("("),
// Subexpressions are evaluated by recursing back into the expression parser.
separated_list0(tag(","), expression),
tag(")")
tag(")"),
),
|v: Vec<Expr>| PostfixOp::Call(v)
)(i)
|v: Vec<Expr>| PostfixOp::Call(v),
)
.parse(i)
}

// The ternary operator is actually just a binary operator that contains another expression. So it can be
// handled similarly to the function call operator except its in a binary position and can only contain
// a single expression.
//
//
// For example the expression "a<b ? a : b" is handled similarly to the function call operator, the
// "?" is treated like an opening bracket and the ":" is treated like a closing bracket.
//
// For the outer expression the result looks like "a<b ?: b". Where "?:" is a single operator. The
// subexpression is contained within the operator in the same way that the function call operator
// contains subexpressions.
fn ternary_operator(i: &str) -> IResult<&str, BinaryOp> {
map(
delimited(
tag("?"),
expression,
tag(":")
),
|e: Expr| BinaryOp::Ternary(e)
)(i)
map(delimited(tag("?"), expression, tag(":")), |e: Expr| {
BinaryOp::Ternary(e)
})
.parse(i)
}

// The actual expression parser .
Expand All @@ -94,65 +91,79 @@ fn expression(i: &str) -> IResult<&str, Expr> {
// Function calls are implemented as postfix unary operators.
unary_op(1, function_call),
alt((
binary_op(3, Assoc::Left, alt((
map(tag("*"), |_| BinaryOp::Multiplication),
map(tag("/"), |_| BinaryOp::Division),
))),
binary_op(4, Assoc::Left, alt((
map(tag("+"), |_| BinaryOp::Addition),
map(tag("-"), |_| BinaryOp::Subtraction),
))),
binary_op(
3,
Assoc::Left,
alt((
map(tag("*"), |_| BinaryOp::Multiplication),
map(tag("/"), |_| BinaryOp::Division),
)),
),
binary_op(
4,
Assoc::Left,
alt((
map(tag("+"), |_| BinaryOp::Addition),
map(tag("-"), |_| BinaryOp::Subtraction),
)),
),
// Ternary operators are just binary operators with a subexpression.
binary_op(5, Assoc::Right, ternary_operator),
)),
alt((
map_res(digit,
|s: &str| match s.parse::<i64>() {
Ok(s) => Ok(Expr::Num(s)),
Err(e) => Err(e),
}
),
map_res(digit, |s: &str| match s.parse::<i64>() {
Ok(s) => Ok(Expr::Num(s)),
Err(e) => Err(e),
}),
map(alphanumeric, |s: &str| Expr::Iden(s.to_string())),
delimited(tag("("), expression, tag(")")),
)),
|op: Operation<PrefixOp, PostfixOp, BinaryOp, Expr>| -> Result<Expr, ()> {
use nom::precedence::Operation::*;
use PrefixOp::*;
use PostfixOp::*;
use BinaryOp::*;
use PostfixOp::*;
use PrefixOp::*;
match op {
// The identity operator (prefix +) is ignored.
Prefix(Identity, e) => Ok(e),

// Unary minus gets evaluated to the same representation as a multiplication with -1.
Prefix(Negate, e) => Ok(Expr::Mul(Expr::Num(-1).into(), e.into())),

// The list of parameters are taken from the operator and placed into the ast.
Postfix(e, Call(p)) => Ok(Expr::Call(e.into(), p)),

// Meaning is assigned to the expressions of the ternary operator during evaluation.
// The lhs becomes the condition, the contained expression is the true case, rhs the false case.
Binary(lhs, Ternary(e), rhs) => Ok(Expr::Tern(lhs.into(), e.into(), rhs.into())),

// Raw operators get turned into their respective ast nodes.
Binary(lhs, Multiplication, rhs) => Ok(Expr::Mul(lhs.into(), rhs.into())),
Binary(lhs, Division, rhs) => Ok(Expr::Div(lhs.into(), rhs.into())),
Binary(lhs, Addition, rhs) => Ok(Expr::Add(lhs.into(), rhs.into())),
Binary(lhs, Subtraction, rhs) => Ok(Expr::Sub(lhs.into(), rhs.into())),
}
}
},
)(i)
}

#[test]
fn expression_test() {
assert_eq!(
expression("-2*max(2,3)-2").map(|(i, x)| (i, format!("{:?}", x))),
Ok(("", String::from("Sub(Mul(Mul(Num(-1), Num(2)), Call(Iden(\"max\"), [Num(2), Num(3)])), Num(2))")))
Ok((
"",
String::from("Sub(Mul(Mul(Num(-1), Num(2)), Call(Iden(\"max\"), [Num(2), Num(3)])), Num(2))")
))
);

assert_eq!(
expression("a?2+c:-2*2").map(|(i, x)| (i, format!("{:?}", x))),
Ok((
"",
String::from(
"Tern(Iden(\"a\"), Add(Num(2), Iden(\"c\")), Mul(Mul(Num(-1), Num(2)), Num(2)))"
)
))
);

assert_eq!(
expression("a?2+c:-2*2").map(|(i, x)| (i, format!("{:?}", x))),
Ok(("", String::from("Tern(Iden(\"a\"), Add(Num(2), Iden(\"c\")), Mul(Mul(Num(-1), Num(2)), Num(2)))")))
);
}

0 comments on commit f1abba6

Please sign in to comment.