-
Notifications
You must be signed in to change notification settings - Fork 1.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
MINOR: Partial fix for SQL aggregate queries with aliases #2464
Changes from all commits
9011865
d27559c
d84196d
985ad76
0a3d3a9
6e697d5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,6 +27,7 @@ use crate::{ | |
error::{DataFusionError, Result}, | ||
logical_plan::{Column, ExpressionVisitor, Recursion}, | ||
}; | ||
use datafusion_expr::expr::find_columns_referenced_by_expr; | ||
use std::collections::HashMap; | ||
|
||
/// Collect all deeply nested `Expr::AggregateFunction` and | ||
|
@@ -58,9 +59,13 @@ pub(crate) fn find_window_exprs(exprs: &[Expr]) -> Vec<Expr> { | |
} | ||
|
||
/// Collect all deeply nested `Expr::Column`'s. They are returned in order of | ||
/// appearance (depth first), with duplicates omitted. | ||
/// appearance (depth first), and may contain duplicates. | ||
pub(crate) fn find_column_exprs(exprs: &[Expr]) -> Vec<Expr> { | ||
find_exprs_in_exprs(exprs, &|nested_expr| matches!(nested_expr, Expr::Column(_))) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This code was only finding some expressions and was not recursing and finding them all |
||
exprs | ||
.iter() | ||
.flat_map(find_columns_referenced_by_expr) | ||
.map(Expr::Column) | ||
.collect() | ||
} | ||
|
||
/// Search the provided `Expr`'s, and all of their nested `Expr`, for any that | ||
|
@@ -137,8 +142,16 @@ where | |
/// Convert any `Expr` to an `Expr::Column`. | ||
pub(crate) fn expr_as_column_expr(expr: &Expr, plan: &LogicalPlan) -> Result<Expr> { | ||
match expr { | ||
Expr::Column(_) => Ok(expr.clone()), | ||
_ => Ok(Expr::Column(Column::from_name(expr.name(plan.schema())?))), | ||
Expr::Column(col) => { | ||
let field = plan.schema().field_from_column(col)?; | ||
Ok(Expr::Column(field.qualified_column())) | ||
} | ||
_ => { | ||
// we should not be trying to create a name for the expression | ||
// based on the input schema but this is the current behavior | ||
// see https://github.com/apache/arrow-datafusion/issues/2456 | ||
Ok(Expr::Column(Column::from_name(expr.name(plan.schema())?))) | ||
} | ||
} | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -232,6 +232,27 @@ async fn csv_query_group_by_avg() -> Result<()> { | |
Ok(()) | ||
} | ||
|
||
#[tokio::test] | ||
async fn csv_query_group_by_with_aliases() -> Result<()> { | ||
let ctx = SessionContext::new(); | ||
register_aggregate_csv(&ctx).await?; | ||
let sql = "SELECT c1 AS c12, avg(c12) AS c1 FROM aggregate_test_100 GROUP BY c1"; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This originally failed with |
||
let actual = execute_to_batches(&ctx, sql).await; | ||
let expected = vec![ | ||
"+-----+---------------------+", | ||
"| c12 | c1 |", | ||
"+-----+---------------------+", | ||
"| a | 0.48754517466109415 |", | ||
"| b | 0.41040709263815384 |", | ||
"| c | 0.6600456536439784 |", | ||
"| d | 0.48855379387549824 |", | ||
"| e | 0.48600669271341534 |", | ||
"+-----+---------------------+", | ||
]; | ||
assert_batches_sorted_eq!(expected, &actual); | ||
Ok(()) | ||
} | ||
|
||
#[tokio::test] | ||
async fn csv_query_group_by_int_count() -> Result<()> { | ||
let ctx = SessionContext::new(); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -251,6 +251,77 @@ pub enum Expr { | |
QualifiedWildcard { qualifier: String }, | ||
} | ||
|
||
/// Recursively find all columns referenced by an expression | ||
pub fn find_columns_referenced_by_expr(e: &Expr) -> Vec<Column> { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this code could use an ExprVisitor to avoid having to do the recursion itself (and potentially missing some case) I took a crack at doing so #2471 |
||
match e { | ||
Expr::Alias(expr, _) | ||
| Expr::Negative(expr) | ||
| Expr::Cast { expr, .. } | ||
| Expr::TryCast { expr, .. } | ||
| Expr::Sort { expr, .. } | ||
| Expr::InList { expr, .. } | ||
| Expr::InSubquery { expr, .. } | ||
| Expr::GetIndexedField { expr, .. } | ||
| Expr::Not(expr) | ||
| Expr::IsNotNull(expr) | ||
| Expr::IsNull(expr) => find_columns_referenced_by_expr(expr), | ||
Expr::Column(c) => vec![c.clone()], | ||
Expr::BinaryExpr { left, right, .. } => { | ||
let mut cols = vec![]; | ||
cols.extend(find_columns_referenced_by_expr(left.as_ref())); | ||
cols.extend(find_columns_referenced_by_expr(right.as_ref())); | ||
cols | ||
} | ||
Expr::Case { | ||
expr, | ||
when_then_expr, | ||
else_expr, | ||
} => { | ||
let mut cols = vec![]; | ||
if let Some(expr) = expr { | ||
cols.extend(find_columns_referenced_by_expr(expr.as_ref())); | ||
} | ||
for (w, t) in when_then_expr { | ||
cols.extend(find_columns_referenced_by_expr(w.as_ref())); | ||
cols.extend(find_columns_referenced_by_expr(t.as_ref())); | ||
} | ||
if let Some(else_expr) = else_expr { | ||
cols.extend(find_columns_referenced_by_expr(else_expr.as_ref())); | ||
} | ||
cols | ||
} | ||
Expr::ScalarFunction { args, .. } => args | ||
.iter() | ||
.flat_map(find_columns_referenced_by_expr) | ||
.collect(), | ||
Expr::AggregateFunction { args, .. } => args | ||
.iter() | ||
.flat_map(find_columns_referenced_by_expr) | ||
.collect(), | ||
Expr::ScalarVariable(_, _) | ||
| Expr::Exists { .. } | ||
| Expr::Wildcard | ||
| Expr::QualifiedWildcard { .. } | ||
| Expr::ScalarSubquery(_) | ||
| Expr::Literal(_) => vec![], | ||
Expr::Between { | ||
expr, low, high, .. | ||
} => { | ||
let mut cols = vec![]; | ||
cols.extend(find_columns_referenced_by_expr(expr.as_ref())); | ||
cols.extend(find_columns_referenced_by_expr(low.as_ref())); | ||
cols.extend(find_columns_referenced_by_expr(high.as_ref())); | ||
cols | ||
} | ||
Expr::ScalarUDF { args, .. } | ||
| Expr::WindowFunction { args, .. } | ||
| Expr::AggregateUDF { args, .. } => args | ||
.iter() | ||
.flat_map(find_columns_referenced_by_expr) | ||
.collect(), | ||
} | ||
} | ||
|
||
/// Fixed seed for the hashing so that Ords are consistent across runs | ||
const SEED: ahash::RandomState = ahash::RandomState::with_seeds(0, 0, 0, 0); | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is the first part of the fix