Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upgrade to datafusion 16 #634

Closed
wants to merge 13 commits into from
Prev Previous commit
Next Next commit
Update test fixtures
  • Loading branch information
Brent Gardner committed Jan 26, 2023
commit 3ac91524764d284f51d1e499858e13b11146bd68
147 changes: 68 additions & 79 deletions ballista/scheduler/src/state/execution_graph_dot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -433,7 +433,8 @@ mod tests {
let dot = ExecutionGraphDot::generate(Arc::new(graph))
.map_err(|e| BallistaError::Internal(format!("{:?}", e)))?;

let expected = r#"digraph G {
let expected = r#"
digraph G {
subgraph cluster0 {
label = "Stage 1 [Resolved]";
stage_1_0 [shape=box, label="ShuffleWriter [0 partitions]"]
Expand All @@ -449,15 +450,15 @@ mod tests {
subgraph cluster2 {
label = "Stage 3 [Unresolved]";
stage_3_0 [shape=box, label="ShuffleWriter [48 partitions]"]
stage_3_0_0 [shape=box, label="CoalesceBatches [batchSize=4096]"]
stage_3_0_0 [shape=box, label="CoalesceBatches [batchSize=8192]"]
stage_3_0_0_0 [shape=box, label="HashJoin
join_expr=a@0 = a@0
filter_expr="]
stage_3_0_0_0_0 [shape=box, label="CoalesceBatches [batchSize=4096]"]
stage_3_0_0_0_0 [shape=box, label="CoalesceBatches [batchSize=8192]"]
stage_3_0_0_0_0_0 [shape=box, label="UnresolvedShuffleExec [stage_id=1]"]
stage_3_0_0_0_0_0 -> stage_3_0_0_0_0
stage_3_0_0_0_0 -> stage_3_0_0_0
stage_3_0_0_0_1 [shape=box, label="CoalesceBatches [batchSize=4096]"]
stage_3_0_0_0_1 [shape=box, label="CoalesceBatches [batchSize=8192]"]
stage_3_0_0_0_1_0 [shape=box, label="UnresolvedShuffleExec [stage_id=2]"]
stage_3_0_0_0_1_0 -> stage_3_0_0_0_1
stage_3_0_0_0_1 -> stage_3_0_0_0
Expand All @@ -474,15 +475,15 @@ filter_expr="]
label = "Stage 5 [Unresolved]";
stage_5_0 [shape=box, label="ShuffleWriter [48 partitions]"]
stage_5_0_0 [shape=box, label="Projection: a@0, b@1, a@2, b@3, a@4, b@5"]
stage_5_0_0_0 [shape=box, label="CoalesceBatches [batchSize=4096]"]
stage_5_0_0_0 [shape=box, label="CoalesceBatches [batchSize=8192]"]
stage_5_0_0_0_0 [shape=box, label="HashJoin
join_expr=b@3 = b@1
filter_expr="]
stage_5_0_0_0_0_0 [shape=box, label="CoalesceBatches [batchSize=4096]"]
stage_5_0_0_0_0_0 [shape=box, label="CoalesceBatches [batchSize=8192]"]
stage_5_0_0_0_0_0_0 [shape=box, label="UnresolvedShuffleExec [stage_id=3]"]
stage_5_0_0_0_0_0_0 -> stage_5_0_0_0_0_0
stage_5_0_0_0_0_0 -> stage_5_0_0_0_0
stage_5_0_0_0_0_1 [shape=box, label="CoalesceBatches [batchSize=4096]"]
stage_5_0_0_0_0_1 [shape=box, label="CoalesceBatches [batchSize=8192]"]
stage_5_0_0_0_0_1_0 [shape=box, label="UnresolvedShuffleExec [stage_id=4]"]
stage_5_0_0_0_0_1_0 -> stage_5_0_0_0_0_1
stage_5_0_0_0_0_1 -> stage_5_0_0_0_0
Expand All @@ -495,8 +496,8 @@ filter_expr="]
stage_3_0 -> stage_5_0_0_0_0_0_0
stage_4_0 -> stage_5_0_0_0_0_1_0
}
"#;
assert_eq!(expected, &dot);
"#.trim();
assert_eq!(dot.trim(), expected);
Ok(())
}

Expand All @@ -506,25 +507,26 @@ filter_expr="]
let dot = ExecutionGraphDot::generate_for_query_stage(Arc::new(graph), 3)
.map_err(|e| BallistaError::Internal(format!("{:?}", e)))?;

let expected = r#"digraph G {
let expected = r#"
digraph G {
stage_3_0 [shape=box, label="ShuffleWriter [48 partitions]"]
stage_3_0_0 [shape=box, label="CoalesceBatches [batchSize=4096]"]
stage_3_0_0 [shape=box, label="CoalesceBatches [batchSize=8192]"]
stage_3_0_0_0 [shape=box, label="HashJoin
join_expr=a@0 = a@0
filter_expr="]
stage_3_0_0_0_0 [shape=box, label="CoalesceBatches [batchSize=4096]"]
stage_3_0_0_0_0 [shape=box, label="CoalesceBatches [batchSize=8192]"]
stage_3_0_0_0_0_0 [shape=box, label="UnresolvedShuffleExec [stage_id=1]"]
stage_3_0_0_0_0_0 -> stage_3_0_0_0_0
stage_3_0_0_0_0 -> stage_3_0_0_0
stage_3_0_0_0_1 [shape=box, label="CoalesceBatches [batchSize=4096]"]
stage_3_0_0_0_1 [shape=box, label="CoalesceBatches [batchSize=8192]"]
stage_3_0_0_0_1_0 [shape=box, label="UnresolvedShuffleExec [stage_id=2]"]
stage_3_0_0_0_1_0 -> stage_3_0_0_0_1
stage_3_0_0_0_1 -> stage_3_0_0_0
stage_3_0_0_0 -> stage_3_0_0
stage_3_0_0 -> stage_3_0
}
"#;
assert_eq!(expected, &dot);
"#.trim();
assert_eq!(dot.trim(), expected);
Ok(())
}

Expand All @@ -534,7 +536,8 @@ filter_expr="]
let dot = ExecutionGraphDot::generate(Arc::new(graph))
.map_err(|e| BallistaError::Internal(format!("{:?}", e)))?;

let expected = r#"digraph G {
let expected = r#"
digraph G {
subgraph cluster0 {
label = "Stage 1 [Resolved]";
stage_1_0 [shape=box, label="ShuffleWriter [0 partitions]"]
Expand All @@ -548,47 +551,56 @@ filter_expr="]
stage_2_0_0 -> stage_2_0
}
subgraph cluster2 {
label = "Stage 3 [Resolved]";
stage_3_0 [shape=box, label="ShuffleWriter [0 partitions]"]
stage_3_0_0 [shape=box, label="MemoryExec"]
label = "Stage 3 [Unresolved]";
stage_3_0 [shape=box, label="ShuffleWriter [48 partitions]"]
stage_3_0_0 [shape=box, label="CoalesceBatches [batchSize=8192]"]
stage_3_0_0_0 [shape=box, label="HashJoin
join_expr=a@0 = a@0
filter_expr="]
stage_3_0_0_0_0 [shape=box, label="CoalesceBatches [batchSize=8192]"]
stage_3_0_0_0_0_0 [shape=box, label="UnresolvedShuffleExec [stage_id=1]"]
stage_3_0_0_0_0_0 -> stage_3_0_0_0_0
stage_3_0_0_0_0 -> stage_3_0_0_0
stage_3_0_0_0_1 [shape=box, label="CoalesceBatches [batchSize=8192]"]
stage_3_0_0_0_1_0 [shape=box, label="UnresolvedShuffleExec [stage_id=2]"]
stage_3_0_0_0_1_0 -> stage_3_0_0_0_1
stage_3_0_0_0_1 -> stage_3_0_0_0
stage_3_0_0_0 -> stage_3_0_0
stage_3_0_0 -> stage_3_0
}
subgraph cluster3 {
label = "Stage 4 [Unresolved]";
stage_4_0 [shape=box, label="ShuffleWriter [48 partitions]"]
stage_4_0_0 [shape=box, label="Projection: a@0, a@1, a@2"]
stage_4_0_0_0 [shape=box, label="CoalesceBatches [batchSize=4096]"]
stage_4_0_0_0_0 [shape=box, label="HashJoin
label = "Stage 4 [Resolved]";
stage_4_0 [shape=box, label="ShuffleWriter [0 partitions]"]
stage_4_0_0 [shape=box, label="MemoryExec"]
stage_4_0_0 -> stage_4_0
}
subgraph cluster4 {
label = "Stage 5 [Unresolved]";
stage_5_0 [shape=box, label="ShuffleWriter [48 partitions]"]
stage_5_0_0 [shape=box, label="Projection: a@0, a@1, a@2"]
stage_5_0_0_0 [shape=box, label="CoalesceBatches [batchSize=8192]"]
stage_5_0_0_0_0 [shape=box, label="HashJoin
join_expr=a@1 = a@0
filter_expr="]
stage_4_0_0_0_0_0 [shape=box, label="CoalesceBatches [batchSize=4096]"]
stage_4_0_0_0_0_0_0 [shape=box, label="HashJoin
join_expr=a@0 = a@0
filter_expr="]
stage_4_0_0_0_0_0_0_0 [shape=box, label="CoalesceBatches [batchSize=4096]"]
stage_4_0_0_0_0_0_0_0_0 [shape=box, label="UnresolvedShuffleExec [stage_id=1]"]
stage_4_0_0_0_0_0_0_0_0 -> stage_4_0_0_0_0_0_0_0
stage_4_0_0_0_0_0_0_0 -> stage_4_0_0_0_0_0_0
stage_4_0_0_0_0_0_0_1 [shape=box, label="CoalesceBatches [batchSize=4096]"]
stage_4_0_0_0_0_0_0_1_0 [shape=box, label="UnresolvedShuffleExec [stage_id=2]"]
stage_4_0_0_0_0_0_0_1_0 -> stage_4_0_0_0_0_0_0_1
stage_4_0_0_0_0_0_0_1 -> stage_4_0_0_0_0_0_0
stage_4_0_0_0_0_0_0 -> stage_4_0_0_0_0_0
stage_4_0_0_0_0_0 -> stage_4_0_0_0_0
stage_4_0_0_0_0_1 [shape=box, label="CoalesceBatches [batchSize=4096]"]
stage_4_0_0_0_0_1_0 [shape=box, label="UnresolvedShuffleExec [stage_id=3]"]
stage_4_0_0_0_0_1_0 -> stage_4_0_0_0_0_1
stage_4_0_0_0_0_1 -> stage_4_0_0_0_0
stage_4_0_0_0_0 -> stage_4_0_0_0
stage_4_0_0_0 -> stage_4_0_0
stage_4_0_0 -> stage_4_0
stage_5_0_0_0_0_0 [shape=box, label="CoalesceBatches [batchSize=8192]"]
stage_5_0_0_0_0_0_0 [shape=box, label="UnresolvedShuffleExec [stage_id=3]"]
stage_5_0_0_0_0_0_0 -> stage_5_0_0_0_0_0
stage_5_0_0_0_0_0 -> stage_5_0_0_0_0
stage_5_0_0_0_0_1 [shape=box, label="CoalesceBatches [batchSize=8192]"]
stage_5_0_0_0_0_1_0 [shape=box, label="UnresolvedShuffleExec [stage_id=4]"]
stage_5_0_0_0_0_1_0 -> stage_5_0_0_0_0_1
stage_5_0_0_0_0_1 -> stage_5_0_0_0_0
stage_5_0_0_0_0 -> stage_5_0_0_0
stage_5_0_0_0 -> stage_5_0_0
stage_5_0_0 -> stage_5_0
}
stage_1_0 -> stage_4_0_0_0_0_0_0_0_0
stage_2_0 -> stage_4_0_0_0_0_0_0_1_0
stage_3_0 -> stage_4_0_0_0_0_1_0
stage_1_0 -> stage_3_0_0_0_0_0
stage_2_0 -> stage_3_0_0_0_1_0
stage_3_0 -> stage_5_0_0_0_0_0_0
stage_4_0 -> stage_5_0_0_0_0_1_0
}
"#;
assert_eq!(expected, &dot);
"#.trim();
assert_eq!(dot.trim(), expected);
Ok(())
}

Expand All @@ -598,37 +610,14 @@ filter_expr="]
let dot = ExecutionGraphDot::generate_for_query_stage(Arc::new(graph), 4)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The plan has changed a lot due to optimizations DF ... I think looking at stage 3 or 5 here rather than 4 would be more interesting though (those have joins)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I switched it to 3

.map_err(|e| BallistaError::Internal(format!("{:?}", e)))?;

let expected = r#"digraph G {
stage_4_0 [shape=box, label="ShuffleWriter [48 partitions]"]
stage_4_0_0 [shape=box, label="Projection: a@0, a@1, a@2"]
stage_4_0_0_0 [shape=box, label="CoalesceBatches [batchSize=4096]"]
stage_4_0_0_0_0 [shape=box, label="HashJoin
join_expr=a@1 = a@0
filter_expr="]
stage_4_0_0_0_0_0 [shape=box, label="CoalesceBatches [batchSize=4096]"]
stage_4_0_0_0_0_0_0 [shape=box, label="HashJoin
join_expr=a@0 = a@0
filter_expr="]
stage_4_0_0_0_0_0_0_0 [shape=box, label="CoalesceBatches [batchSize=4096]"]
stage_4_0_0_0_0_0_0_0_0 [shape=box, label="UnresolvedShuffleExec [stage_id=1]"]
stage_4_0_0_0_0_0_0_0_0 -> stage_4_0_0_0_0_0_0_0
stage_4_0_0_0_0_0_0_0 -> stage_4_0_0_0_0_0_0
stage_4_0_0_0_0_0_0_1 [shape=box, label="CoalesceBatches [batchSize=4096]"]
stage_4_0_0_0_0_0_0_1_0 [shape=box, label="UnresolvedShuffleExec [stage_id=2]"]
stage_4_0_0_0_0_0_0_1_0 -> stage_4_0_0_0_0_0_0_1
stage_4_0_0_0_0_0_0_1 -> stage_4_0_0_0_0_0_0
stage_4_0_0_0_0_0_0 -> stage_4_0_0_0_0_0
stage_4_0_0_0_0_0 -> stage_4_0_0_0_0
stage_4_0_0_0_0_1 [shape=box, label="CoalesceBatches [batchSize=4096]"]
stage_4_0_0_0_0_1_0 [shape=box, label="UnresolvedShuffleExec [stage_id=3]"]
stage_4_0_0_0_0_1_0 -> stage_4_0_0_0_0_1
stage_4_0_0_0_0_1 -> stage_4_0_0_0_0
stage_4_0_0_0_0 -> stage_4_0_0_0
stage_4_0_0_0 -> stage_4_0_0
let expected = r#"
digraph G {
stage_4_0 [shape=box, label="ShuffleWriter [0 partitions]"]
stage_4_0_0 [shape=box, label="MemoryExec"]
stage_4_0_0 -> stage_4_0
}
"#;
assert_eq!(expected, &dot);
"#.trim();
assert_eq!(dot.trim(), expected);
Ok(())
}

Expand Down