Skip to content

Commit

Permalink
Allow OptimizerRule to easily optimize subqueries
Browse files Browse the repository at this point in the history
  • Loading branch information
eejbyfeldt committed Oct 11, 2024
1 parent a8d3fae commit cdd1236
Show file tree
Hide file tree
Showing 7 changed files with 71 additions and 33 deletions.
8 changes: 6 additions & 2 deletions datafusion/expr/src/expr_rewriter/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -307,8 +307,12 @@ impl NamePreserver {
pub fn new(plan: &LogicalPlan) -> Self {
Self {
// The schema of Filter and Join nodes comes from their inputs rather than their output expressions,
// so there is no need to use aliases to preserve expression names.
use_alias: !matches!(plan, LogicalPlan::Filter(_) | LogicalPlan::Join(_)),
// so there is no need to use aliases to preserve expression names. For TableScan nodes
// the expressions are only for filters and also not part of the output schema.
use_alias: !matches!(
plan,
LogicalPlan::Filter(_) | LogicalPlan::Join(_) | LogicalPlan::TableScan(_)
),
}
}

Expand Down
60 changes: 37 additions & 23 deletions datafusion/optimizer/src/optimizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,11 @@ pub trait OptimizerRule: Debug {
None
}

/// Should the rule be applied to subqueries.
fn optimize_subqueries(&self) -> bool {
false
}

/// Does this rule support rewriting owned plans (rather than by reference)?
fn supports_rewrite(&self) -> bool {
true
Expand Down Expand Up @@ -244,17 +249,13 @@ impl Optimizer {
pub fn new() -> Self {
let rules: Vec<Arc<dyn OptimizerRule + Sync + Send>> = vec![
Arc::new(EliminateNestedUnion::new()),
Arc::new(SimplifyExpressions::new()),
Arc::new(UnwrapCastInComparison::new()),
Arc::new(SimplifyExpressions::new()),
Arc::new(ReplaceDistinctWithAggregate::new()),
Arc::new(EliminateJoin::new()),
Arc::new(DecorrelatePredicateSubquery::new()),
Arc::new(ScalarSubqueryToJoin::new()),
Arc::new(ExtractEquijoinPredicate::new()),
// simplify expressions does not simplify expressions in subqueries, so we
// run it again after running the optimizations that potentially converted
// subqueries to joins
Arc::new(SimplifyExpressions::new()),
Arc::new(RewriteDisjunctivePredicate::new()),
Arc::new(EliminateDuplicatedExpr::new()),
Arc::new(EliminateFilter::new()),
Expand All @@ -272,8 +273,8 @@ impl Optimizer {
Arc::new(SingleDistinctToGroupBy::new()),
// The previous optimizations added expressions and projections,
// that might benefit from the following rules
Arc::new(SimplifyExpressions::new()),
Arc::new(UnwrapCastInComparison::new()),
Arc::new(SimplifyExpressions::new()),
Arc::new(CommonSubexprEliminate::new()),
Arc::new(EliminateGroupByConstant::new()),
Arc::new(OptimizeProjections::new()),
Expand Down Expand Up @@ -382,23 +383,36 @@ impl Optimizer {
.skip_failed_rules
.then(|| new_plan.clone());

let starting_schema = Arc::clone(new_plan.schema());

let result = match rule.apply_order() {
// optimizer handles recursion
Some(apply_order) => new_plan.rewrite(&mut Rewriter::new(
apply_order,
rule.as_ref(),
config,
)),
// rule handles recursion itself
None => optimize_plan_node(new_plan, rule.as_ref(), config),
}
// verify the rule didn't change the schema
.and_then(|tnr| {
assert_schema_is_the_same(rule.name(), &starting_schema, &tnr.data)?;
Ok(tnr)
});
let apply_rule = |plan: LogicalPlan| {
let starting_schema = Arc::clone(plan.schema());
match rule.apply_order() {
// optimizer handles recursion
Some(apply_order) => plan.rewrite(&mut Rewriter::new(
apply_order,
rule.as_ref(),
config,
)),
// rule handles recursion itself
None => optimize_plan_node(plan, rule.as_ref(), config),
}
// verify the rule didn't change the schema
.and_then(|tnr| {
assert_schema_is_the_same(
rule.name(),
&starting_schema,
&tnr.data,
)?;
Ok(tnr)
})
};

let result = if rule.optimize_subqueries() {
apply_rule(new_plan).and_then(|transformed| {
transformed.transform_data(|plan| plan.map_subqueries(apply_rule))
})
} else {
apply_rule(new_plan)
};

// Handle results
match (result, prev_plan) {
Expand Down
16 changes: 15 additions & 1 deletion datafusion/optimizer/src/simplify_expressions/regex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ pub fn simplify_regex_expr(
) -> Result<Expr> {
let mode = OperatorMode::new(&op);

if let Expr::Literal(ScalarValue::Utf8(Some(pattern))) = right.as_ref() {
if let Some(pattern) = extract_string_literal(right.as_ref()) {
match regex_syntax::Parser::new().parse(pattern) {
Ok(hir) => {
let kind = hir.kind();
Expand Down Expand Up @@ -70,6 +70,20 @@ pub fn simplify_regex_expr(
Ok(Expr::BinaryExpr(BinaryExpr { left, op, right }))
}

fn extract_string_literal(expr: &Expr) -> Option<&String> {
fn extract_from_scalar(value: &ScalarValue) -> Option<&String> {
match value {
ScalarValue::Utf8(Some(s)) => Some(s),
ScalarValue::Dictionary(_, value) => extract_from_scalar(value),
_ => None,
}
}
match expr {
Expr::Literal(value) => extract_from_scalar(value),
_ => None,
}
}

#[derive(Debug)]
struct OperatorMode {
/// Negative match.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ impl OptimizerRule for SimplifyExpressions {
Some(ApplyOrder::BottomUp)
}

fn optimize_subqueries(&self) -> bool {
true
}

fn supports_rewrite(&self) -> bool {
true
}
Expand Down
4 changes: 4 additions & 0 deletions datafusion/optimizer/src/unwrap_cast_in_comparison.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,10 @@ impl OptimizerRule for UnwrapCastInComparison {
true
}

fn optimize_subqueries(&self) -> bool {
true
}

fn rewrite(
&self,
plan: LogicalPlan,
Expand Down
10 changes: 4 additions & 6 deletions datafusion/sqllogictest/test_files/explain.slt
Original file line number Diff line number Diff line change
Expand Up @@ -180,14 +180,13 @@ logical_plan after type_coercion SAME TEXT AS ABOVE
logical_plan after count_wildcard_rule SAME TEXT AS ABOVE
analyzed_logical_plan SAME TEXT AS ABOVE
logical_plan after eliminate_nested_union SAME TEXT AS ABOVE
logical_plan after simplify_expressions SAME TEXT AS ABOVE
logical_plan after unwrap_cast_in_comparison SAME TEXT AS ABOVE
logical_plan after simplify_expressions SAME TEXT AS ABOVE
logical_plan after replace_distinct_aggregate SAME TEXT AS ABOVE
logical_plan after eliminate_join SAME TEXT AS ABOVE
logical_plan after decorrelate_predicate_subquery SAME TEXT AS ABOVE
logical_plan after scalar_subquery_to_join SAME TEXT AS ABOVE
logical_plan after extract_equijoin_predicate SAME TEXT AS ABOVE
logical_plan after simplify_expressions SAME TEXT AS ABOVE
logical_plan after rewrite_disjunctive_predicate SAME TEXT AS ABOVE
logical_plan after eliminate_duplicated_expr SAME TEXT AS ABOVE
logical_plan after eliminate_filter SAME TEXT AS ABOVE
Expand All @@ -201,20 +200,19 @@ logical_plan after eliminate_outer_join SAME TEXT AS ABOVE
logical_plan after push_down_limit SAME TEXT AS ABOVE
logical_plan after push_down_filter SAME TEXT AS ABOVE
logical_plan after single_distinct_aggregation_to_group_by SAME TEXT AS ABOVE
logical_plan after simplify_expressions SAME TEXT AS ABOVE
logical_plan after unwrap_cast_in_comparison SAME TEXT AS ABOVE
logical_plan after simplify_expressions SAME TEXT AS ABOVE
logical_plan after common_sub_expression_eliminate SAME TEXT AS ABOVE
logical_plan after eliminate_group_by_constant SAME TEXT AS ABOVE
logical_plan after optimize_projections TableScan: simple_explain_test projection=[a, b, c]
logical_plan after eliminate_nested_union SAME TEXT AS ABOVE
logical_plan after simplify_expressions SAME TEXT AS ABOVE
logical_plan after unwrap_cast_in_comparison SAME TEXT AS ABOVE
logical_plan after simplify_expressions SAME TEXT AS ABOVE
logical_plan after replace_distinct_aggregate SAME TEXT AS ABOVE
logical_plan after eliminate_join SAME TEXT AS ABOVE
logical_plan after decorrelate_predicate_subquery SAME TEXT AS ABOVE
logical_plan after scalar_subquery_to_join SAME TEXT AS ABOVE
logical_plan after extract_equijoin_predicate SAME TEXT AS ABOVE
logical_plan after simplify_expressions SAME TEXT AS ABOVE
logical_plan after rewrite_disjunctive_predicate SAME TEXT AS ABOVE
logical_plan after eliminate_duplicated_expr SAME TEXT AS ABOVE
logical_plan after eliminate_filter SAME TEXT AS ABOVE
Expand All @@ -228,8 +226,8 @@ logical_plan after eliminate_outer_join SAME TEXT AS ABOVE
logical_plan after push_down_limit SAME TEXT AS ABOVE
logical_plan after push_down_filter SAME TEXT AS ABOVE
logical_plan after single_distinct_aggregation_to_group_by SAME TEXT AS ABOVE
logical_plan after simplify_expressions SAME TEXT AS ABOVE
logical_plan after unwrap_cast_in_comparison SAME TEXT AS ABOVE
logical_plan after simplify_expressions SAME TEXT AS ABOVE
logical_plan after common_sub_expression_eliminate SAME TEXT AS ABOVE
logical_plan after eliminate_group_by_constant SAME TEXT AS ABOVE
logical_plan after optimize_projections SAME TEXT AS ABOVE
Expand Down
2 changes: 1 addition & 1 deletion datafusion/sqllogictest/test_files/subquery.slt
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,7 @@ logical_plan
01)Filter: EXISTS (<subquery>)
02)--Subquery:
03)----Projection: t1.t1_int
04)------Filter: t1.t1_id > t1.t1_int
04)------Filter: t1.t1_int < t1.t1_id
05)--------TableScan: t1
06)--TableScan: t1 projection=[t1_id, t1_name, t1_int]

Expand Down

0 comments on commit cdd1236

Please sign in to comment.