From 3349d66dc81417d8bfe5387058c794baa89624cf Mon Sep 17 00:00:00 2001 From: ygf11 Date: Tue, 4 Oct 2022 21:23:07 -0400 Subject: [PATCH 1/3] Expose and document a simpler public API for simplify expressions --- .../optimizer/src/simplify_expressions.rs | 70 +++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/datafusion/optimizer/src/simplify_expressions.rs b/datafusion/optimizer/src/simplify_expressions.rs index 4f92a65e4505..0256a916277a 100644 --- a/datafusion/optimizer/src/simplify_expressions.rs +++ b/datafusion/optimizer/src/simplify_expressions.rs @@ -950,6 +950,24 @@ macro_rules! assert_contains { }; } +/// Apply simplification and constant propagation to ([Expr]). +/// +/// # Arguments +/// +/// * `expr` - The logical expression +/// * `schema` - The DataFusion schema for the expr, used to resolve `Column` references +/// to qualified or unqualified fields by name. +/// * `props` - The Arrow schema for the input, used for determining expression data types +/// when performing type coercion. +pub fn simplify_expr( + expr: Expr, + schema: DFSchemaRef, + props: &ExecutionProps, +) -> Result { + let info = SimplifyContext::new(vec![&schema], props); + expr.simplify(&info) +} + #[cfg(test)] mod tests { use super::*; @@ -2553,4 +2571,56 @@ mod tests { assert_optimized_plan_eq(&plan, expected); } + + #[test] + fn simplify_expr_for_constant_fold_test() { + let schema = DFSchema::new_with_metadata( + vec![DFField::new(None, "x", DataType::Int32, false)], + HashMap::new(), + ) + .unwrap(); + + // x + (1 + 3) + let expr = Expr::BinaryExpr { + left: Box::new(col("x")), + op: Operator::Plus, + right: Box::new(Expr::BinaryExpr { + left: Box::new(lit(1)), + op: Operator::Plus, + right: Box::new(lit(3)), + }), + }; + + let props = ExecutionProps::new(); + let simplifed_expr = simplify_expr(expr, Arc::new(schema), &props).unwrap(); + + // x + 4 + let expected = Expr::BinaryExpr { + left: Box::new(col("x")), + op: Operator::Plus, + right: Box::new(lit(4)), + }; + assert_eq!(simplifed_expr, expected); + } + + #[test] + fn simplify_expr_for_rewrite_test() { + let schema = DFSchema::new_with_metadata( + vec![DFField::new(None, "x", DataType::Int32, false)], + HashMap::new(), + ) + .unwrap(); + + // x * 1 + let expr = Expr::BinaryExpr { + left: Box::new(col("x")), + op: Operator::Multiply, + right: Box::new(lit(1)), + }; + + let props = ExecutionProps::new(); + let simplifed_expr = simplify_expr(expr, Arc::new(schema), &props).unwrap(); + + assert_eq!(simplifed_expr, col("x")); + } } From 0e27949157b5f8cff355ea2ed4e89ebe78f35197 Mon Sep 17 00:00:00 2001 From: ygf11 Date: Thu, 6 Oct 2022 01:42:34 -0400 Subject: [PATCH 2/3] Fix minor comment --- .../optimizer/src/simplify_expressions.rs | 66 ++++++------------- 1 file changed, 20 insertions(+), 46 deletions(-) diff --git a/datafusion/optimizer/src/simplify_expressions.rs b/datafusion/optimizer/src/simplify_expressions.rs index 0256a916277a..717afee2d0f1 100644 --- a/datafusion/optimizer/src/simplify_expressions.rs +++ b/datafusion/optimizer/src/simplify_expressions.rs @@ -2573,54 +2573,28 @@ mod tests { } #[test] - fn simplify_expr_for_constant_fold_test() { - let schema = DFSchema::new_with_metadata( - vec![DFField::new(None, "x", DataType::Int32, false)], - HashMap::new(), - ) - .unwrap(); - - // x + (1 + 3) - let expr = Expr::BinaryExpr { - left: Box::new(col("x")), - op: Operator::Plus, - right: Box::new(Expr::BinaryExpr { - left: Box::new(lit(1)), - op: Operator::Plus, - right: Box::new(lit(3)), - }), - }; - + fn simplify_expr_api_test() { + let schema = Arc::new( + DFSchema::new_with_metadata( + vec![DFField::new(None, "x", DataType::Int32, false)], + HashMap::new(), + ) + .unwrap(), + ); let props = ExecutionProps::new(); - let simplifed_expr = simplify_expr(expr, Arc::new(schema), &props).unwrap(); - - // x + 4 - let expected = Expr::BinaryExpr { - left: Box::new(col("x")), - op: Operator::Plus, - right: Box::new(lit(4)), - }; - assert_eq!(simplifed_expr, expected); - } - #[test] - fn simplify_expr_for_rewrite_test() { - let schema = DFSchema::new_with_metadata( - vec![DFField::new(None, "x", DataType::Int32, false)], - HashMap::new(), - ) - .unwrap(); - - // x * 1 - let expr = Expr::BinaryExpr { - left: Box::new(col("x")), - op: Operator::Multiply, - right: Box::new(lit(1)), - }; - - let props = ExecutionProps::new(); - let simplifed_expr = simplify_expr(expr, Arc::new(schema), &props).unwrap(); + // x + (1 + 3) -> x + 4 + { + let expr = col("x") + (lit(1) + lit(3)); + let simplifed_expr = simplify_expr(expr, schema.clone(), &props).unwrap(); + assert_eq!(simplifed_expr, col("x") + lit(4)); + } - assert_eq!(simplifed_expr, col("x")); + // x * 1 -> x + { + let expr = col("x") * lit(1); + let simplifed_expr = simplify_expr(expr, schema, &props).unwrap(); + assert_eq!(simplifed_expr, col("x")); + } } } From 851dc3a18e7bc21d6aef3fe4c81d32234f173bb9 Mon Sep 17 00:00:00 2001 From: ygf11 Date: Thu, 6 Oct 2022 19:15:11 -0400 Subject: [PATCH 3/3] Fixed signature of schema --- .../optimizer/src/simplify_expressions.rs | 20 ++++++++----------- 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/datafusion/optimizer/src/simplify_expressions.rs b/datafusion/optimizer/src/simplify_expressions.rs index 717afee2d0f1..74bc7682d48b 100644 --- a/datafusion/optimizer/src/simplify_expressions.rs +++ b/datafusion/optimizer/src/simplify_expressions.rs @@ -961,10 +961,10 @@ macro_rules! assert_contains { /// when performing type coercion. pub fn simplify_expr( expr: Expr, - schema: DFSchemaRef, + schema: &DFSchemaRef, props: &ExecutionProps, ) -> Result { - let info = SimplifyContext::new(vec![&schema], props); + let info = SimplifyContext::new(vec![schema], props); expr.simplify(&info) } @@ -973,7 +973,7 @@ mod tests { use super::*; use arrow::array::{ArrayRef, Int32Array}; use chrono::{DateTime, TimeZone, Utc}; - use datafusion_common::DFField; + use datafusion_common::{DFField, ToDFSchema}; use datafusion_expr::logical_plan::table_scan; use datafusion_expr::{ and, binary_expr, call_fn, col, create_udf, lit, lit_timestamp_nano, @@ -2574,26 +2574,22 @@ mod tests { #[test] fn simplify_expr_api_test() { - let schema = Arc::new( - DFSchema::new_with_metadata( - vec![DFField::new(None, "x", DataType::Int32, false)], - HashMap::new(), - ) - .unwrap(), - ); + let schema = Schema::new(vec![Field::new("x", DataType::Int32, false)]) + .to_dfschema_ref() + .unwrap(); let props = ExecutionProps::new(); // x + (1 + 3) -> x + 4 { let expr = col("x") + (lit(1) + lit(3)); - let simplifed_expr = simplify_expr(expr, schema.clone(), &props).unwrap(); + let simplifed_expr = simplify_expr(expr, &schema, &props).unwrap(); assert_eq!(simplifed_expr, col("x") + lit(4)); } // x * 1 -> x { let expr = col("x") * lit(1); - let simplifed_expr = simplify_expr(expr, schema, &props).unwrap(); + let simplifed_expr = simplify_expr(expr, &schema, &props).unwrap(); assert_eq!(simplifed_expr, col("x")); } }