Skip to content

Commit 9cf32cf

Browse files
authored
Implement readable explain plans for physical plans (#337)
* Implement readable explain plans for physical plans * Add apache copyright to display.rs * Set concurrency explictly in test and make it windows friendly * fix doc example test * fmt!
1 parent b096539 commit 9cf32cf

30 files changed

+683
-48
lines changed

datafusion/src/logical_plan/display.rs

+3-10
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@ pub struct IndentVisitor<'a, 'b> {
2929
f: &'a mut fmt::Formatter<'b>,
3030
/// If true, includes summarized schema information
3131
with_schema: bool,
32-
indent: u32,
32+
/// The current indent
33+
indent: usize,
3334
}
3435

3536
impl<'a, 'b> IndentVisitor<'a, 'b> {
@@ -42,13 +43,6 @@ impl<'a, 'b> IndentVisitor<'a, 'b> {
4243
indent: 0,
4344
}
4445
}
45-
46-
fn write_indent(&mut self) -> fmt::Result {
47-
for _ in 0..self.indent {
48-
write!(self.f, " ")?;
49-
}
50-
Ok(())
51-
}
5246
}
5347

5448
impl<'a, 'b> PlanVisitor for IndentVisitor<'a, 'b> {
@@ -58,8 +52,7 @@ impl<'a, 'b> PlanVisitor for IndentVisitor<'a, 'b> {
5852
if self.indent > 0 {
5953
writeln!(self.f)?;
6054
}
61-
self.write_indent()?;
62-
55+
write!(self.f, "{:indent$}", "", indent = self.indent * 2)?;
6356
write!(self.f, "{}", plan.display())?;
6457
if self.with_schema {
6558
write!(

datafusion/src/logical_plan/plan.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -356,13 +356,15 @@ pub enum Partitioning {
356356
/// after all children have been visited.
357357
////
358358
/// To use, define a struct that implements this trait and then invoke
359-
/// "LogicalPlan::accept".
359+
/// [`LogicalPlan::accept`].
360360
///
361361
/// For example, for a logical plan like:
362362
///
363+
/// ```text
363364
/// Projection: #id
364365
/// Filter: #state Eq Utf8(\"CO\")\
365366
/// CsvScan: employee.csv projection=Some([0, 3])";
367+
/// ```
366368
///
367369
/// The sequence of visit operations would be:
368370
/// ```text

datafusion/src/physical_plan/coalesce_batches.rs

+18-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@ use std::task::{Context, Poll};
2525

2626
use crate::error::{DataFusionError, Result};
2727
use crate::physical_plan::{
28-
ExecutionPlan, Partitioning, RecordBatchStream, SendableRecordBatchStream,
28+
DisplayFormatType, ExecutionPlan, Partitioning, RecordBatchStream,
29+
SendableRecordBatchStream,
2930
};
3031

3132
use arrow::compute::kernels::concat::concat;
@@ -114,6 +115,22 @@ impl ExecutionPlan for CoalesceBatchesExec {
114115
is_closed: false,
115116
}))
116117
}
118+
119+
fn fmt_as(
120+
&self,
121+
t: DisplayFormatType,
122+
f: &mut std::fmt::Formatter,
123+
) -> std::fmt::Result {
124+
match t {
125+
DisplayFormatType::Default => {
126+
write!(
127+
f,
128+
"CoalesceBatchesExec: target_batch_size={}",
129+
self.target_batch_size
130+
)
131+
}
132+
}
133+
}
117134
}
118135

119136
struct CoalesceBatchesStream {

datafusion/src/physical_plan/cross_join.rs

+16-3
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
use futures::{lock::Mutex, StreamExt};
2222
use std::{any::Any, sync::Arc, task::Poll};
2323

24-
use crate::physical_plan::memory::MemoryStream;
2524
use arrow::datatypes::{Schema, SchemaRef};
2625
use arrow::error::Result as ArrowResult;
2726
use arrow::record_batch::RecordBatch;
@@ -36,8 +35,10 @@ use crate::{
3635
use async_trait::async_trait;
3736
use std::time::Instant;
3837

39-
use super::{ExecutionPlan, Partitioning, RecordBatchStream, SendableRecordBatchStream};
40-
use crate::physical_plan::coalesce_batches::concat_batches;
38+
use super::{
39+
coalesce_batches::concat_batches, memory::MemoryStream, DisplayFormatType,
40+
ExecutionPlan, Partitioning, RecordBatchStream, SendableRecordBatchStream,
41+
};
4142
use log::debug;
4243

4344
/// Data of the left side
@@ -192,6 +193,18 @@ impl ExecutionPlan for CrossJoinExec {
192193
join_time: 0,
193194
}))
194195
}
196+
197+
fn fmt_as(
198+
&self,
199+
t: DisplayFormatType,
200+
f: &mut std::fmt::Formatter,
201+
) -> std::fmt::Result {
202+
match t {
203+
DisplayFormatType::Default => {
204+
write!(f, "CrossJoinExec")
205+
}
206+
}
207+
}
195208
}
196209

197210
/// A stream that issues [RecordBatch]es as they arrive from the right of the join.

datafusion/src/physical_plan/csv.rs

+30-2
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,7 @@
1818
//! Execution plan for reading CSV files
1919
2020
use crate::error::{DataFusionError, Result};
21-
use crate::physical_plan::ExecutionPlan;
22-
use crate::physical_plan::{common, Partitioning};
21+
use crate::physical_plan::{common, DisplayFormatType, ExecutionPlan, Partitioning};
2322
use arrow::csv;
2423
use arrow::datatypes::{Schema, SchemaRef};
2524
use arrow::error::Result as ArrowResult;
@@ -135,6 +134,19 @@ impl std::fmt::Debug for Source {
135134
}
136135
}
137136

137+
impl std::fmt::Display for Source {
138+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
139+
match self {
140+
Source::PartitionedFiles { path, filenames } => {
141+
write!(f, "Path({}: [{}])", path, filenames.join(","))
142+
}
143+
Source::Reader(_) => {
144+
write!(f, "Reader(...)")
145+
}
146+
}
147+
}
148+
}
149+
138150
impl Clone for Source {
139151
fn clone(&self) -> Self {
140152
match self {
@@ -405,6 +417,22 @@ impl ExecutionPlan for CsvExec {
405417
}
406418
}
407419
}
420+
421+
fn fmt_as(
422+
&self,
423+
t: DisplayFormatType,
424+
f: &mut std::fmt::Formatter,
425+
) -> std::fmt::Result {
426+
match t {
427+
DisplayFormatType::Default => {
428+
write!(
429+
f,
430+
"CsvExec: source={}, has_header={}",
431+
self.source, self.has_header
432+
)
433+
}
434+
}
435+
}
408436
}
409437

410438
/// Iterator over batches
+90
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
//! Implementation of physical plan display. See
19+
//! [`crate::physical_plan::displayable`] for examples of how to
20+
//! format
21+
22+
use std::fmt;
23+
24+
use super::{accept, ExecutionPlan, ExecutionPlanVisitor};
25+
26+
/// Options for controlling how each [`ExecutionPlan`] should format itself
27+
#[derive(Debug, Clone, Copy)]
28+
pub enum DisplayFormatType {
29+
/// Default, compact format. Example: `FilterExec: c12 < 10.0`
30+
Default,
31+
}
32+
33+
/// Wraps an `ExecutionPlan` with various ways to display this plan
34+
pub struct DisplayableExecutionPlan<'a> {
35+
inner: &'a dyn ExecutionPlan,
36+
}
37+
38+
impl<'a> DisplayableExecutionPlan<'a> {
39+
/// Create a wrapper around an [`'ExecutionPlan'] which can be
40+
/// pretty printed in a variety of ways
41+
pub fn new(inner: &'a dyn ExecutionPlan) -> Self {
42+
Self { inner }
43+
}
44+
45+
/// Return a `format`able structure that produces a single line
46+
/// per node.
47+
///
48+
/// ```text
49+
/// ProjectionExec: expr=[a]
50+
/// CoalesceBatchesExec: target_batch_size=4096
51+
/// FilterExec: a < 5
52+
/// RepartitionExec: partitioning=RoundRobinBatch(16)
53+
/// CsvExec: source=...",
54+
/// ```
55+
pub fn indent(&self) -> impl fmt::Display + 'a {
56+
struct Wrapper<'a>(&'a dyn ExecutionPlan);
57+
impl<'a> fmt::Display for Wrapper<'a> {
58+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
59+
let t = DisplayFormatType::Default;
60+
let mut visitor = IndentVisitor { t, f, indent: 0 };
61+
accept(self.0, &mut visitor)
62+
}
63+
}
64+
Wrapper(self.inner)
65+
}
66+
}
67+
68+
/// Formats plans with a single line per node.
69+
struct IndentVisitor<'a, 'b> {
70+
/// How to format each node
71+
t: DisplayFormatType,
72+
/// Write to this formatter
73+
f: &'a mut fmt::Formatter<'b>,
74+
///with_schema: bool,
75+
indent: usize,
76+
}
77+
78+
impl<'a, 'b> ExecutionPlanVisitor for IndentVisitor<'a, 'b> {
79+
type Error = fmt::Error;
80+
fn pre_visit(
81+
&mut self,
82+
plan: &dyn ExecutionPlan,
83+
) -> std::result::Result<bool, Self::Error> {
84+
write!(self.f, "{:indent$}", "", indent = self.indent * 2)?;
85+
plan.fmt_as(self.t, self.f)?;
86+
writeln!(self.f)?;
87+
self.indent += 1;
88+
Ok(true)
89+
}
90+
}

datafusion/src/physical_plan/distinct_expressions.rs

+4
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,10 @@ impl AggregateExpr for DistinctCount {
120120
count_data_type: self.data_type.clone(),
121121
}))
122122
}
123+
124+
fn name(&self) -> &str {
125+
&self.name
126+
}
123127
}
124128

125129
#[derive(Debug)]

datafusion/src/physical_plan/empty.rs

+15-2
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,9 @@ use std::any::Any;
2121
use std::sync::Arc;
2222

2323
use crate::error::{DataFusionError, Result};
24-
use crate::physical_plan::memory::MemoryStream;
25-
use crate::physical_plan::{Distribution, ExecutionPlan, Partitioning};
24+
use crate::physical_plan::{
25+
memory::MemoryStream, DisplayFormatType, Distribution, ExecutionPlan, Partitioning,
26+
};
2627
use arrow::array::NullArray;
2728
use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
2829
use arrow::record_batch::RecordBatch;
@@ -120,6 +121,18 @@ impl ExecutionPlan for EmptyExec {
120121
None,
121122
)?))
122123
}
124+
125+
fn fmt_as(
126+
&self,
127+
t: DisplayFormatType,
128+
f: &mut std::fmt::Formatter,
129+
) -> std::fmt::Result {
130+
match t {
131+
DisplayFormatType::Default => {
132+
write!(f, "EmptyExec: produce_one_row={}", self.produce_one_row)
133+
}
134+
}
135+
}
123136
}
124137

125138
#[cfg(test)]

datafusion/src/physical_plan/explain.rs

+15-4
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,14 @@
2020
use std::any::Any;
2121
use std::sync::Arc;
2222

23-
use crate::error::{DataFusionError, Result};
2423
use crate::{
24+
error::{DataFusionError, Result},
2525
logical_plan::StringifiedPlan,
26-
physical_plan::{common::SizedRecordBatchStream, ExecutionPlan},
26+
physical_plan::Partitioning,
27+
physical_plan::{common::SizedRecordBatchStream, DisplayFormatType, ExecutionPlan},
2728
};
2829
use arrow::{array::StringBuilder, datatypes::SchemaRef, record_batch::RecordBatch};
2930

30-
use crate::physical_plan::Partitioning;
31-
3231
use super::SendableRecordBatchStream;
3332
use async_trait::async_trait;
3433

@@ -122,4 +121,16 @@ impl ExecutionPlan for ExplainExec {
122121
vec![Arc::new(record_batch)],
123122
)))
124123
}
124+
125+
fn fmt_as(
126+
&self,
127+
t: DisplayFormatType,
128+
f: &mut std::fmt::Formatter,
129+
) -> std::fmt::Result {
130+
match t {
131+
DisplayFormatType::Default => {
132+
write!(f, "ExplainExec")
133+
}
134+
}
135+
}
125136
}

datafusion/src/physical_plan/expressions/average.rs

+4
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,10 @@ impl AggregateExpr for Avg {
109109
fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>> {
110110
vec![self.expr.clone()]
111111
}
112+
113+
fn name(&self) -> &str {
114+
&self.name
115+
}
112116
}
113117

114118
/// An accumulator to compute the average

datafusion/src/physical_plan/expressions/count.rs

+4
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,10 @@ impl AggregateExpr for Count {
8383
fn create_accumulator(&self) -> Result<Box<dyn Accumulator>> {
8484
Ok(Box::new(CountAccumulator::new()))
8585
}
86+
87+
fn name(&self) -> &str {
88+
&self.name
89+
}
8690
}
8791

8892
#[derive(Debug)]

datafusion/src/physical_plan/expressions/min_max.rs

+8
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,10 @@ impl AggregateExpr for Max {
8888
fn create_accumulator(&self) -> Result<Box<dyn Accumulator>> {
8989
Ok(Box::new(MaxAccumulator::try_new(&self.data_type)?))
9090
}
91+
92+
fn name(&self) -> &str {
93+
&self.name
94+
}
9195
}
9296

9397
// Statically-typed version of min/max(array) -> ScalarValue for string types.
@@ -387,6 +391,10 @@ impl AggregateExpr for Min {
387391
fn create_accumulator(&self) -> Result<Box<dyn Accumulator>> {
388392
Ok(Box::new(MinAccumulator::try_new(&self.data_type)?))
389393
}
394+
395+
fn name(&self) -> &str {
396+
&self.name
397+
}
390398
}
391399

392400
#[derive(Debug)]

0 commit comments

Comments
 (0)