From cef119da9ee8672b1b1e50ac01387dcb1640d96e Mon Sep 17 00:00:00 2001 From: Marco Neumann Date: Fri, 24 Feb 2023 17:10:44 +0100 Subject: [PATCH] fix: misc phys. expression display bugs (#5387) * fix: `TRY_CAST` phys. expr display * refactor: move precedence from `BinaryExpr` to `Operator` This is useful in other contexts as well, e.g. for formatting phys. expressions. * fix: display of nested phys. binary expr should use parenthesis Found while working on #4695. --- datafusion/expr/src/expr.rs | 33 +------ datafusion/expr/src/operator.rs | 29 ++++++ .../physical-expr/src/expressions/binary.rs | 92 ++++++++++++++++++- .../physical-expr/src/expressions/try_cast.rs | 14 +-- 4 files changed, 129 insertions(+), 39 deletions(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index a78808b26c29..8b6c39043b6c 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -238,35 +238,6 @@ impl BinaryExpr { pub fn new(left: Box, op: Operator, right: Box) -> Self { Self { left, op, right } } - - /// Get the operator precedence - /// use as a reference - pub fn precedence(&self) -> u8 { - match self.op { - Operator::Or => 5, - Operator::And => 10, - Operator::NotEq - | Operator::Eq - | Operator::Lt - | Operator::LtEq - | Operator::Gt - | Operator::GtEq => 20, - Operator::Plus | Operator::Minus => 30, - Operator::Multiply | Operator::Divide | Operator::Modulo => 40, - Operator::IsDistinctFrom - | Operator::IsNotDistinctFrom - | Operator::RegexMatch - | Operator::RegexNotMatch - | Operator::RegexIMatch - | Operator::RegexNotIMatch - | Operator::BitwiseAnd - | Operator::BitwiseOr - | Operator::BitwiseShiftLeft - | Operator::BitwiseShiftRight - | Operator::BitwiseXor - | Operator::StringConcat => 0, - } - } } impl Display for BinaryExpr { @@ -283,7 +254,7 @@ impl Display for BinaryExpr { ) -> fmt::Result { match expr { Expr::BinaryExpr(child) => { - let p = child.precedence(); + let p = child.op.precedence(); if p == 0 || p < precedence { write!(f, "({child})")?; } else { @@ -295,7 +266,7 @@ impl Display for BinaryExpr { Ok(()) } - let precedence = self.precedence(); + let precedence = self.op.precedence(); write_child(f, self.left.as_ref(), precedence)?; write!(f, " {} ", self.op)?; write_child(f, self.right.as_ref(), precedence) diff --git a/datafusion/expr/src/operator.rs b/datafusion/expr/src/operator.rs index fac81654c4a8..ca6fb75276ff 100644 --- a/datafusion/expr/src/operator.rs +++ b/datafusion/expr/src/operator.rs @@ -142,6 +142,35 @@ impl Operator { | Operator::StringConcat => None, } } + + /// Get the operator precedence + /// use as a reference + pub fn precedence(&self) -> u8 { + match self { + Operator::Or => 5, + Operator::And => 10, + Operator::NotEq + | Operator::Eq + | Operator::Lt + | Operator::LtEq + | Operator::Gt + | Operator::GtEq => 20, + Operator::Plus | Operator::Minus => 30, + Operator::Multiply | Operator::Divide | Operator::Modulo => 40, + Operator::IsDistinctFrom + | Operator::IsNotDistinctFrom + | Operator::RegexMatch + | Operator::RegexNotMatch + | Operator::RegexIMatch + | Operator::RegexNotIMatch + | Operator::BitwiseAnd + | Operator::BitwiseOr + | Operator::BitwiseShiftLeft + | Operator::BitwiseShiftRight + | Operator::BitwiseXor + | Operator::StringConcat => 0, + } + } } impl fmt::Display for Operator { diff --git a/datafusion/physical-expr/src/expressions/binary.rs b/datafusion/physical-expr/src/expressions/binary.rs index ec9096ffc02a..0024deb3d4d8 100644 --- a/datafusion/physical-expr/src/expressions/binary.rs +++ b/datafusion/physical-expr/src/expressions/binary.rs @@ -118,7 +118,34 @@ impl BinaryExpr { impl std::fmt::Display for BinaryExpr { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "{} {} {}", self.left, self.op, self.right) + // Put parentheses around child binary expressions so that we can see the difference + // between `(a OR b) AND c` and `a OR (b AND c)`. We only insert parentheses when needed, + // based on operator precedence. For example, `(a AND b) OR c` and `a AND b OR c` are + // equivalent and the parentheses are not necessary. + + fn write_child( + f: &mut std::fmt::Formatter, + expr: &dyn PhysicalExpr, + precedence: u8, + ) -> std::fmt::Result { + if let Some(child) = expr.as_any().downcast_ref::() { + let p = child.op.precedence(); + if p == 0 || p < precedence { + write!(f, "({child})")?; + } else { + write!(f, "{child}")?; + } + } else { + write!(f, "{expr}")?; + } + + Ok(()) + } + + let precedence = self.op.precedence(); + write_child(f, self.left.as_ref(), precedence)?; + write!(f, " {} ", self.op)?; + write_child(f, self.right.as_ref(), precedence) } } @@ -4249,4 +4276,67 @@ mod tests { Ok(()) } + + #[test] + fn test_display_and_or_combo() { + let expr = BinaryExpr::new( + Arc::new(BinaryExpr::new( + lit(ScalarValue::from(1)), + Operator::And, + lit(ScalarValue::from(2)), + )), + Operator::And, + Arc::new(BinaryExpr::new( + lit(ScalarValue::from(3)), + Operator::And, + lit(ScalarValue::from(4)), + )), + ); + assert_eq!(expr.to_string(), "1 AND 2 AND 3 AND 4"); + + let expr = BinaryExpr::new( + Arc::new(BinaryExpr::new( + lit(ScalarValue::from(1)), + Operator::Or, + lit(ScalarValue::from(2)), + )), + Operator::Or, + Arc::new(BinaryExpr::new( + lit(ScalarValue::from(3)), + Operator::Or, + lit(ScalarValue::from(4)), + )), + ); + assert_eq!(expr.to_string(), "1 OR 2 OR 3 OR 4"); + + let expr = BinaryExpr::new( + Arc::new(BinaryExpr::new( + lit(ScalarValue::from(1)), + Operator::And, + lit(ScalarValue::from(2)), + )), + Operator::Or, + Arc::new(BinaryExpr::new( + lit(ScalarValue::from(3)), + Operator::And, + lit(ScalarValue::from(4)), + )), + ); + assert_eq!(expr.to_string(), "1 AND 2 OR 3 AND 4"); + + let expr = BinaryExpr::new( + Arc::new(BinaryExpr::new( + lit(ScalarValue::from(1)), + Operator::Or, + lit(ScalarValue::from(2)), + )), + Operator::And, + Arc::new(BinaryExpr::new( + lit(ScalarValue::from(3)), + Operator::Or, + lit(ScalarValue::from(4)), + )), + ); + assert_eq!(expr.to_string(), "(1 OR 2) AND (3 OR 4)"); + } } diff --git a/datafusion/physical-expr/src/expressions/try_cast.rs b/datafusion/physical-expr/src/expressions/try_cast.rs index ce5f1c2794b5..bceed0a34c48 100644 --- a/datafusion/physical-expr/src/expressions/try_cast.rs +++ b/datafusion/physical-expr/src/expressions/try_cast.rs @@ -58,7 +58,7 @@ impl TryCastExpr { impl fmt::Display for TryCastExpr { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "CAST({} AS {:?})", self.expr, self.cast_type) + write!(f, "TRY_CAST({} AS {:?})", self.expr, self.cast_type) } } @@ -132,7 +132,7 @@ pub fn try_cast( Ok(Arc::new(TryCastExpr::new(expr, cast_type))) } else { Err(DataFusionError::NotImplemented(format!( - "Unsupported CAST from {expr_type:?} to {cast_type:?}" + "Unsupported TRY_CAST from {expr_type:?} to {cast_type:?}" ))) } } @@ -155,7 +155,7 @@ mod tests { // runs an end-to-end test of physical type cast // 1. construct a record batch with a column "a" of type A - // 2. construct a physical expression of CAST(a AS B) + // 2. construct a physical expression of TRY_CAST(a AS B) // 3. evaluate the expression // 4. verify that the resulting expression is of type B // 5. verify that the resulting values are downcastable and correct @@ -171,7 +171,7 @@ mod tests { // verify that its display is correct assert_eq!( - format!("CAST(a@0 AS {:?})", $TYPE), + format!("TRY_CAST(a@0 AS {:?})", $TYPE), format!("{}", expression) ); @@ -202,7 +202,7 @@ mod tests { // runs an end-to-end test of physical type cast // 1. construct a record batch with a column "a" of type A - // 2. construct a physical expression of CAST(a AS B) + // 2. construct a physical expression of TRY_CAST(a AS B) // 3. evaluate the expression // 4. verify that the resulting expression is of type B // 5. verify that the resulting values are downcastable and correct @@ -218,7 +218,7 @@ mod tests { // verify that its display is correct assert_eq!( - format!("CAST(a@0 AS {:?})", $TYPE), + format!("TRY_CAST(a@0 AS {:?})", $TYPE), format!("{}", expression) ); @@ -542,7 +542,7 @@ mod tests { let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]); let result = try_cast(col("a", &schema).unwrap(), &schema, DataType::LargeBinary); - result.expect_err("expected Invalid CAST"); + result.expect_err("expected Invalid TRY_CAST"); } // create decimal array with the specified precision and scale