Skip to content

Commit ad70a1e

Browse files
authored
reuse datafusion physical planner in ballista building from protobuf (#532)
* use logical planner in ballista building * simplify statement * fix unit test * fix per comment
1 parent 63e3045 commit ad70a1e

File tree

3 files changed

+153
-149
lines changed

3 files changed

+153
-149
lines changed

ballista/rust/core/src/serde/physical_plan/from_proto.rs

+26-116
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ use datafusion::execution::context::{
3636
ExecutionConfig, ExecutionContextState, ExecutionProps,
3737
};
3838
use datafusion::logical_plan::{DFSchema, Expr};
39-
use datafusion::physical_plan::aggregates::{create_aggregate_expr, AggregateFunction};
39+
use datafusion::physical_plan::aggregates::AggregateFunction;
4040
use datafusion::physical_plan::expressions::col;
4141
use datafusion::physical_plan::hash_aggregate::{AggregateMode, HashAggregateExec};
4242
use datafusion::physical_plan::hash_join::PartitionMode;
@@ -45,7 +45,6 @@ use datafusion::physical_plan::planner::DefaultPhysicalPlanner;
4545
use datafusion::physical_plan::window_functions::{
4646
BuiltInWindowFunction, WindowFunction,
4747
};
48-
use datafusion::physical_plan::windows::create_window_expr;
4948
use datafusion::physical_plan::windows::WindowAggExec;
5049
use datafusion::physical_plan::{
5150
coalesce_batches::CoalesceBatchesExec,
@@ -205,76 +204,27 @@ impl TryInto<Arc<dyn ExecutionPlan>> for &protobuf::PhysicalPlanNode {
205204
)
206205
})?
207206
.clone();
208-
209207
let physical_schema: SchemaRef =
210208
SchemaRef::new((&input_schema).try_into()?);
211-
212-
let catalog_list =
213-
Arc::new(MemoryCatalogList::new()) as Arc<dyn CatalogList>;
214-
let ctx_state = ExecutionContextState {
215-
catalog_list,
216-
scalar_functions: Default::default(),
217-
var_provider: Default::default(),
218-
aggregate_functions: Default::default(),
219-
config: ExecutionConfig::new(),
220-
execution_props: ExecutionProps::new(),
221-
};
222-
209+
let ctx_state = ExecutionContextState::new();
223210
let window_agg_expr: Vec<(Expr, String)> = window_agg
224211
.window_expr
225212
.iter()
226213
.zip(window_agg.window_expr_name.iter())
227214
.map(|(expr, name)| expr.try_into().map(|expr| (expr, name.clone())))
228215
.collect::<Result<Vec<_>, _>>()?;
229-
230-
let mut physical_window_expr = vec![];
231-
232216
let df_planner = DefaultPhysicalPlanner::default();
233-
234-
for (expr, name) in &window_agg_expr {
235-
match expr {
236-
Expr::WindowFunction {
237-
fun,
238-
args,
239-
partition_by,
240-
order_by,
241-
window_frame,
242-
..
243-
} => {
244-
let arg = df_planner
245-
.create_physical_expr(
246-
&args[0],
247-
&physical_schema,
248-
&ctx_state,
249-
)
250-
.map_err(|e| {
251-
BallistaError::General(format!("{:?}", e))
252-
})?;
253-
if !partition_by.is_empty() {
254-
return Err(BallistaError::NotImplemented("Window function with partition by is not yet implemented".to_owned()));
255-
}
256-
if !order_by.is_empty() {
257-
return Err(BallistaError::NotImplemented("Window function with order by is not yet implemented".to_owned()));
258-
}
259-
if window_frame.is_some() {
260-
return Err(BallistaError::NotImplemented("Window function with window frame is not yet implemented".to_owned()));
261-
}
262-
let window_expr = create_window_expr(
263-
&fun,
264-
&[arg],
265-
&physical_schema,
266-
name.to_owned(),
267-
)?;
268-
physical_window_expr.push(window_expr);
269-
}
270-
_ => {
271-
return Err(BallistaError::General(
272-
"Invalid expression for WindowAggrExec".to_string(),
273-
));
274-
}
275-
}
276-
}
277-
217+
let physical_window_expr = window_agg_expr
218+
.iter()
219+
.map(|(expr, name)| {
220+
df_planner.create_window_expr_with_name(
221+
expr,
222+
name.to_string(),
223+
&physical_schema,
224+
&ctx_state,
225+
)
226+
})
227+
.collect::<Result<Vec<_>, _>>()?;
278228
Ok(Arc::new(WindowAggExec::try_new(
279229
physical_window_expr,
280230
input,
@@ -297,7 +247,6 @@ impl TryInto<Arc<dyn ExecutionPlan>> for &protobuf::PhysicalPlanNode {
297247
AggregateMode::FinalPartitioned
298248
}
299249
};
300-
301250
let group = hash_agg
302251
.group_expr
303252
.iter()
@@ -306,25 +255,13 @@ impl TryInto<Arc<dyn ExecutionPlan>> for &protobuf::PhysicalPlanNode {
306255
compile_expr(expr, &input.schema()).map(|e| (e, name.to_string()))
307256
})
308257
.collect::<Result<Vec<_>, _>>()?;
309-
310258
let logical_agg_expr: Vec<(Expr, String)> = hash_agg
311259
.aggr_expr
312260
.iter()
313261
.zip(hash_agg.aggr_expr_name.iter())
314262
.map(|(expr, name)| expr.try_into().map(|expr| (expr, name.clone())))
315263
.collect::<Result<Vec<_>, _>>()?;
316-
317-
let catalog_list =
318-
Arc::new(MemoryCatalogList::new()) as Arc<dyn CatalogList>;
319-
let ctx_state = ExecutionContextState {
320-
catalog_list,
321-
scalar_functions: Default::default(),
322-
var_provider: Default::default(),
323-
aggregate_functions: Default::default(),
324-
config: ExecutionConfig::new(),
325-
execution_props: ExecutionProps::new(),
326-
};
327-
264+
let ctx_state = ExecutionContextState::new();
328265
let input_schema = hash_agg
329266
.input_schema
330267
.as_ref()
@@ -336,37 +273,18 @@ impl TryInto<Arc<dyn ExecutionPlan>> for &protobuf::PhysicalPlanNode {
336273
.clone();
337274
let physical_schema: SchemaRef =
338275
SchemaRef::new((&input_schema).try_into()?);
339-
340-
let mut physical_aggr_expr = vec![];
341-
342276
let df_planner = DefaultPhysicalPlanner::default();
343-
for (expr, name) in &logical_agg_expr {
344-
match expr {
345-
Expr::AggregateFunction { fun, args, .. } => {
346-
let arg = df_planner
347-
.create_physical_expr(
348-
&args[0],
349-
&physical_schema,
350-
&ctx_state,
351-
)
352-
.map_err(|e| {
353-
BallistaError::General(format!("{:?}", e))
354-
})?;
355-
physical_aggr_expr.push(create_aggregate_expr(
356-
&fun,
357-
false,
358-
&[arg],
359-
&physical_schema,
360-
name.to_string(),
361-
)?);
362-
}
363-
_ => {
364-
return Err(BallistaError::General(
365-
"Invalid expression for HashAggregateExec".to_string(),
366-
))
367-
}
368-
}
369-
}
277+
let physical_aggr_expr = logical_agg_expr
278+
.iter()
279+
.map(|(expr, name)| {
280+
df_planner.create_aggregate_expr_with_name(
281+
expr,
282+
name.to_string(),
283+
&physical_schema,
284+
&ctx_state,
285+
)
286+
})
287+
.collect::<Result<Vec<_>, _>>()?;
370288
Ok(Arc::new(HashAggregateExec::try_new(
371289
agg_mode,
372290
group,
@@ -484,15 +402,7 @@ fn compile_expr(
484402
schema: &Schema,
485403
) -> Result<Arc<dyn PhysicalExpr>, BallistaError> {
486404
let df_planner = DefaultPhysicalPlanner::default();
487-
let catalog_list = Arc::new(MemoryCatalogList::new()) as Arc<dyn CatalogList>;
488-
let state = ExecutionContextState {
489-
catalog_list,
490-
scalar_functions: HashMap::new(),
491-
var_provider: HashMap::new(),
492-
aggregate_functions: HashMap::new(),
493-
config: ExecutionConfig::new(),
494-
execution_props: ExecutionProps::new(),
495-
};
405+
let state = ExecutionContextState::new();
496406
let expr: Expr = expr.try_into()?;
497407
df_planner
498408
.create_physical_expr(&expr, schema, &state)

datafusion/src/physical_plan/planner.rs

+95-21
Original file line numberDiff line numberDiff line change
@@ -731,34 +731,82 @@ impl DefaultPhysicalPlanner {
731731
}
732732
}
733733

734-
/// Create a window expression from a logical expression
735-
pub fn create_window_expr(
734+
/// Create a window expression with a name from a logical expression
735+
pub fn create_window_expr_with_name(
736736
&self,
737737
e: &Expr,
738-
logical_input_schema: &DFSchema,
738+
name: String,
739739
physical_input_schema: &Schema,
740740
ctx_state: &ExecutionContextState,
741741
) -> Result<Arc<dyn WindowExpr>> {
742-
// unpack aliased logical expressions, e.g. "sum(col) over () as total"
743-
let (name, e) = match e {
744-
Expr::Alias(sub_expr, alias) => (alias.clone(), sub_expr.as_ref()),
745-
_ => (e.name(logical_input_schema)?, e),
746-
};
747-
748742
match e {
749-
Expr::WindowFunction { fun, args, .. } => {
743+
Expr::WindowFunction {
744+
fun,
745+
args,
746+
partition_by,
747+
order_by,
748+
window_frame,
749+
} => {
750750
let args = args
751751
.iter()
752752
.map(|e| {
753753
self.create_physical_expr(e, physical_input_schema, ctx_state)
754754
})
755755
.collect::<Result<Vec<_>>>()?;
756-
// if !order_by.is_empty() {
757-
// return Err(DataFusionError::NotImplemented(
758-
// "Window function with order by is not yet implemented".to_owned(),
759-
// ));
760-
// }
761-
windows::create_window_expr(fun, &args, physical_input_schema, name)
756+
let partition_by = partition_by
757+
.iter()
758+
.map(|e| {
759+
self.create_physical_expr(e, physical_input_schema, ctx_state)
760+
})
761+
.collect::<Result<Vec<_>>>()?;
762+
let order_by = order_by
763+
.iter()
764+
.map(|e| match e {
765+
Expr::Sort {
766+
expr,
767+
asc,
768+
nulls_first,
769+
} => self.create_physical_sort_expr(
770+
expr,
771+
&physical_input_schema,
772+
SortOptions {
773+
descending: !*asc,
774+
nulls_first: *nulls_first,
775+
},
776+
&ctx_state,
777+
),
778+
_ => Err(DataFusionError::Plan(
779+
"Sort only accepts sort expressions".to_string(),
780+
)),
781+
})
782+
.collect::<Result<Vec<_>>>()?;
783+
if !partition_by.is_empty() {
784+
return Err(DataFusionError::NotImplemented(
785+
"window expression with non-empty partition by clause is not yet supported"
786+
.to_owned(),
787+
));
788+
}
789+
if !order_by.is_empty() {
790+
return Err(DataFusionError::NotImplemented(
791+
"window expression with non-empty order by clause is not yet supported"
792+
.to_owned(),
793+
));
794+
}
795+
if window_frame.is_some() {
796+
return Err(DataFusionError::NotImplemented(
797+
"window expression with window frame definition is not yet supported"
798+
.to_owned(),
799+
));
800+
}
801+
windows::create_window_expr(
802+
fun,
803+
name,
804+
&args,
805+
&partition_by,
806+
&order_by,
807+
*window_frame,
808+
physical_input_schema,
809+
)
762810
}
763811
other => Err(DataFusionError::Internal(format!(
764812
"Invalid window expression '{:?}'",
@@ -767,20 +815,30 @@ impl DefaultPhysicalPlanner {
767815
}
768816
}
769817

770-
/// Create an aggregate expression from a logical expression
771-
pub fn create_aggregate_expr(
818+
/// Create a window expression from a logical expression or an alias
819+
pub fn create_window_expr(
772820
&self,
773821
e: &Expr,
774822
logical_input_schema: &DFSchema,
775823
physical_input_schema: &Schema,
776824
ctx_state: &ExecutionContextState,
777-
) -> Result<Arc<dyn AggregateExpr>> {
778-
// unpack aliased logical expressions, e.g. "sum(col) as total"
825+
) -> Result<Arc<dyn WindowExpr>> {
826+
// unpack aliased logical expressions, e.g. "sum(col) over () as total"
779827
let (name, e) = match e {
780828
Expr::Alias(sub_expr, alias) => (alias.clone(), sub_expr.as_ref()),
781829
_ => (e.name(logical_input_schema)?, e),
782830
};
831+
self.create_window_expr_with_name(e, name, physical_input_schema, ctx_state)
832+
}
783833

834+
/// Create an aggregate expression with a name from a logical expression
835+
pub fn create_aggregate_expr_with_name(
836+
&self,
837+
e: &Expr,
838+
name: String,
839+
physical_input_schema: &Schema,
840+
ctx_state: &ExecutionContextState,
841+
) -> Result<Arc<dyn AggregateExpr>> {
784842
match e {
785843
Expr::AggregateFunction {
786844
fun,
@@ -819,7 +877,23 @@ impl DefaultPhysicalPlanner {
819877
}
820878
}
821879

822-
/// Create an aggregate expression from a logical expression
880+
/// Create an aggregate expression from a logical expression or an alias
881+
pub fn create_aggregate_expr(
882+
&self,
883+
e: &Expr,
884+
logical_input_schema: &DFSchema,
885+
physical_input_schema: &Schema,
886+
ctx_state: &ExecutionContextState,
887+
) -> Result<Arc<dyn AggregateExpr>> {
888+
// unpack aliased logical expressions, e.g. "sum(col) as total"
889+
let (name, e) = match e {
890+
Expr::Alias(sub_expr, alias) => (alias.clone(), sub_expr.as_ref()),
891+
_ => (e.name(logical_input_schema)?, e),
892+
};
893+
self.create_aggregate_expr_with_name(e, name, physical_input_schema, ctx_state)
894+
}
895+
896+
/// Create a physical sort expression from a logical expression
823897
pub fn create_physical_sort_expr(
824898
&self,
825899
e: &Expr,

0 commit comments

Comments
 (0)