-
Notifications
You must be signed in to change notification settings - Fork 1.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Optimizer: avoid every rule must recursive children in optimizer #4618
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -59,6 +59,13 @@ pub trait OptimizerRule { | |
|
||
/// A human readable name for this optimizer rule | ||
fn name(&self) -> &str; | ||
|
||
/// How should the rule be applied by the optimizer? See comments on [`ApplyOrder`] for details. | ||
/// | ||
/// If a rule use default None, its should traverse recursively plan inside itself | ||
fn apply_order(&self) -> Option<ApplyOrder> { | ||
None | ||
} | ||
Comment on lines
+66
to
+69
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It' compatible with origin |
||
} | ||
|
||
/// Options to control the DataFusion Optimizer. | ||
|
@@ -147,6 +154,44 @@ pub struct Optimizer { | |
pub rules: Vec<Arc<dyn OptimizerRule + Send + Sync>>, | ||
} | ||
|
||
/// If a rule is with `ApplyOrder`, it means the optimizer will derive to handle children instead of | ||
/// recursively handling in rule. | ||
/// We just need handle a subtree pattern itself. | ||
/// | ||
/// Notice: **sometime** result after optimize still can be optimized, we need apply again. | ||
/// | ||
/// Usage Example: Merge Limit (subtree pattern is: Limit-Limit) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you |
||
/// ```rust | ||
/// use datafusion_expr::{Limit, LogicalPlan, LogicalPlanBuilder}; | ||
/// use datafusion_common::Result; | ||
/// fn merge_limit(parent: &Limit, child: &Limit) -> LogicalPlan { | ||
/// // just for run | ||
/// return parent.input.as_ref().clone(); | ||
/// } | ||
/// fn try_optimize(plan: &LogicalPlan) -> Result<Option<LogicalPlan>> { | ||
/// match plan { | ||
/// LogicalPlan::Limit(limit) => match limit.input.as_ref() { | ||
/// LogicalPlan::Limit(child_limit) => { | ||
/// // merge limit ... | ||
/// let optimized_plan = merge_limit(limit, child_limit); | ||
/// // due to optimized_plan may be optimized again, | ||
/// // for example: plan is Limit-Limit-Limit | ||
/// Ok(Some( | ||
/// try_optimize(&optimized_plan)? | ||
/// .unwrap_or_else(|| optimized_plan.clone()), | ||
/// )) | ||
/// } | ||
/// _ => Ok(None), | ||
/// }, | ||
/// _ => Ok(None), | ||
/// } | ||
/// } | ||
/// ``` | ||
pub enum ApplyOrder { | ||
jackwener marked this conversation as resolved.
Show resolved
Hide resolved
|
||
TopDown, | ||
BottomUp, | ||
} | ||
|
||
impl Optimizer { | ||
/// Create a new optimizer using the recommended list of rules | ||
pub fn new(config: &OptimizerConfig) -> Self { | ||
|
@@ -213,7 +258,7 @@ impl Optimizer { | |
log_plan(&format!("Optimizer input (pass {})", i), &new_plan); | ||
|
||
for rule in &self.rules { | ||
let result = rule.try_optimize(&new_plan, optimizer_config); | ||
let result = self.optimize_recursively(rule, &new_plan, optimizer_config); | ||
match result { | ||
Ok(Some(plan)) => { | ||
if !plan.schema().equivalent_names_and_types(new_plan.schema()) { | ||
|
@@ -274,6 +319,80 @@ impl Optimizer { | |
debug!("Optimizer took {} ms", start_time.elapsed().as_millis()); | ||
Ok(new_plan) | ||
} | ||
|
||
fn optimize_node( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What value does this function add? In other words, why not call There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I forgot to add future TODO: for rule in rules:
rule.try_optimize(rule) |
||
&self, | ||
rule: &Arc<dyn OptimizerRule + Send + Sync>, | ||
plan: &LogicalPlan, | ||
optimizer_config: &mut OptimizerConfig, | ||
) -> Result<Option<LogicalPlan>> { | ||
// TODO: future feature: We can do Batch optimize | ||
rule.try_optimize(plan, optimizer_config) | ||
} | ||
|
||
fn optimize_inputs( | ||
&self, | ||
rule: &Arc<dyn OptimizerRule + Send + Sync>, | ||
plan: &LogicalPlan, | ||
optimizer_config: &mut OptimizerConfig, | ||
) -> Result<Option<LogicalPlan>> { | ||
let inputs = plan.inputs(); | ||
let result = inputs | ||
.iter() | ||
.map(|sub_plan| self.optimize_recursively(rule, sub_plan, optimizer_config)) | ||
.collect::<Result<Vec<_>>>()?; | ||
if result.is_empty() || result.iter().all(|o| o.is_none()) { | ||
return Ok(None); | ||
} | ||
|
||
let new_inputs = result | ||
.into_iter() | ||
.enumerate() | ||
.map(|(i, o)| match o { | ||
Some(plan) => plan, | ||
None => (*(inputs.get(i).unwrap())).clone(), | ||
}) | ||
.collect::<Vec<_>>(); | ||
|
||
Ok(Some(plan.with_new_inputs(new_inputs.as_slice())?)) | ||
} | ||
|
||
/// Use a rule to optimize the whole plan. | ||
/// If the rule with `ApplyOrder`, we don't need to recursively handle children in rule. | ||
pub fn optimize_recursively( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you please add docstrings about what this API does? Also, I wonder if it needs to be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sometime we need use it in UT😂 |
||
&self, | ||
rule: &Arc<dyn OptimizerRule + Send + Sync>, | ||
plan: &LogicalPlan, | ||
optimizer_config: &mut OptimizerConfig, | ||
) -> Result<Option<LogicalPlan>> { | ||
match rule.apply_order() { | ||
Some(order) => match order { | ||
ApplyOrder::TopDown => { | ||
let optimize_self_opt = | ||
self.optimize_node(rule, plan, optimizer_config)?; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I took a shot at rewriting this stuff to use Result::and_then, etc and I think how you have written it here is clearer 👍 |
||
let optimize_inputs_opt = match &optimize_self_opt { | ||
Some(optimized_plan) => { | ||
self.optimize_inputs(rule, optimized_plan, optimizer_config)? | ||
} | ||
_ => self.optimize_inputs(rule, plan, optimizer_config)?, | ||
}; | ||
Ok(optimize_inputs_opt.or(optimize_self_opt)) | ||
} | ||
ApplyOrder::BottomUp => { | ||
let optimize_inputs_opt = | ||
self.optimize_inputs(rule, plan, optimizer_config)?; | ||
let optimize_self_opt = match &optimize_inputs_opt { | ||
Some(optimized_plan) => { | ||
self.optimize_node(rule, optimized_plan, optimizer_config)? | ||
} | ||
_ => self.optimize_node(rule, plan, optimizer_config)?, | ||
}; | ||
Ok(optimize_self_opt.or(optimize_inputs_opt)) | ||
} | ||
}, | ||
_ => rule.try_optimize(plan, optimizer_config), | ||
} | ||
} | ||
} | ||
|
||
/// Log the plan in debug/tracing mode after some part of the optimizer runs | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.