Skip to content

Commit 1ba07d3

Browse files
y-f-uLordworms
authored andcommitted
fix: unparser generates wrong sql for derived table with columns (apache#17) (apache#11505)
* fix unparser for derived table with columns * refactoring * renaming * case in tests
1 parent 6fabeb0 commit 1ba07d3

File tree

2 files changed

+96
-10
lines changed

2 files changed

+96
-10
lines changed

datafusion/sql/src/unparser/plan.rs

+67-10
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ use datafusion_common::{internal_err, not_impl_err, plan_err, DataFusionError, R
1919
use datafusion_expr::{
2020
expr::Alias, Distinct, Expr, JoinConstraint, JoinType, LogicalPlan, Projection,
2121
};
22-
use sqlparser::ast::{self, SetExpr};
22+
use sqlparser::ast::{self, Ident, SetExpr};
2323

2424
use crate::unparser::utils::unproject_agg_exprs;
2525

@@ -457,15 +457,11 @@ impl Unparser<'_> {
457457
}
458458
LogicalPlan::SubqueryAlias(plan_alias) => {
459459
// Handle bottom-up to allocate relation
460-
self.select_to_sql_recursively(
461-
plan_alias.input.as_ref(),
462-
query,
463-
select,
464-
relation,
465-
)?;
460+
let (plan, columns) = subquery_alias_inner_query_and_columns(plan_alias);
466461

462+
self.select_to_sql_recursively(plan, query, select, relation)?;
467463
relation.alias(Some(
468-
self.new_table_alias(plan_alias.alias.table().to_string()),
464+
self.new_table_alias(plan_alias.alias.table().to_string(), columns),
469465
));
470466

471467
Ok(())
@@ -599,10 +595,10 @@ impl Unparser<'_> {
599595
self.binary_op_to_sql(lhs, rhs, ast::BinaryOperator::And)
600596
}
601597

602-
fn new_table_alias(&self, alias: String) -> ast::TableAlias {
598+
fn new_table_alias(&self, alias: String, columns: Vec<Ident>) -> ast::TableAlias {
603599
ast::TableAlias {
604600
name: self.new_ident_quoted_if_needs(alias),
605-
columns: Vec::new(),
601+
columns,
606602
}
607603
}
608604

@@ -611,6 +607,67 @@ impl Unparser<'_> {
611607
}
612608
}
613609

610+
// This logic is to work out the columns and inner query for SubqueryAlias plan for both types of
611+
// subquery
612+
// - `(SELECT column_a as a from table) AS A`
613+
// - `(SELECT column_a from table) AS A (a)`
614+
//
615+
// A roundtrip example for table alias with columns
616+
//
617+
// query: SELECT id FROM (SELECT j1_id from j1) AS c (id)
618+
//
619+
// LogicPlan:
620+
// Projection: c.id
621+
// SubqueryAlias: c
622+
// Projection: j1.j1_id AS id
623+
// Projection: j1.j1_id
624+
// TableScan: j1
625+
//
626+
// Before introducing this logic, the unparsed query would be `SELECT c.id FROM (SELECT j1.j1_id AS
627+
// id FROM (SELECT j1.j1_id FROM j1)) AS c`.
628+
// The query is invalid as `j1.j1_id` is not a valid identifier in the derived table
629+
// `(SELECT j1.j1_id FROM j1)`
630+
//
631+
// With this logic, the unparsed query will be:
632+
// `SELECT c.id FROM (SELECT j1.j1_id FROM j1) AS c (id)`
633+
//
634+
// Caveat: this won't handle the case like `select * from (select 1, 2) AS a (b, c)`
635+
// as the parser gives a wrong plan which has mismatch `Int(1)` types: Literal and
636+
// Column in the Projections. Once the parser side is fixed, this logic should work
637+
fn subquery_alias_inner_query_and_columns(
638+
subquery_alias: &datafusion_expr::SubqueryAlias,
639+
) -> (&LogicalPlan, Vec<Ident>) {
640+
let plan: &LogicalPlan = subquery_alias.input.as_ref();
641+
642+
let LogicalPlan::Projection(outer_projections) = plan else {
643+
return (plan, vec![]);
644+
};
645+
646+
// check if it's projection inside projection
647+
let LogicalPlan::Projection(inner_projection) = outer_projections.input.as_ref()
648+
else {
649+
return (plan, vec![]);
650+
};
651+
652+
let mut columns: Vec<Ident> = vec![];
653+
// check if the inner projection and outer projection have a matching pattern like
654+
// Projection: j1.j1_id AS id
655+
// Projection: j1.j1_id
656+
for (i, inner_expr) in inner_projection.expr.iter().enumerate() {
657+
let Expr::Alias(ref outer_alias) = &outer_projections.expr[i] else {
658+
return (plan, vec![]);
659+
};
660+
661+
if outer_alias.expr.as_ref() != inner_expr {
662+
return (plan, vec![]);
663+
};
664+
665+
columns.push(outer_alias.name.as_str().into());
666+
}
667+
668+
(outer_projections.input.as_ref(), columns)
669+
}
670+
614671
impl From<BuilderError> for DataFusionError {
615672
fn from(e: BuilderError) -> Self {
616673
DataFusionError::External(Box::new(e))

datafusion/sql/tests/cases/plan_to_sql.rs

+29
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,35 @@ fn roundtrip_statement_with_dialect() -> Result<()> {
240240
parser_dialect: Box::new(GenericDialect {}),
241241
unparser_dialect: Box::new(UnparserDefaultDialect {}),
242242
},
243+
// more tests around subquery/derived table roundtrip
244+
TestStatementWithDialect {
245+
sql: "SELECT string_count FROM (
246+
SELECT
247+
j1_id,
248+
MIN(j2_string)
249+
FROM
250+
j1 LEFT OUTER JOIN j2 ON
251+
j1_id = j2_id
252+
GROUP BY
253+
j1_id
254+
) AS agg (id, string_count)
255+
",
256+
expected: r#"SELECT agg.string_count FROM (SELECT j1.j1_id, MIN(j2.j2_string) FROM j1 LEFT JOIN j2 ON (j1.j1_id = j2.j2_id) GROUP BY j1.j1_id) AS agg (id, string_count)"#,
257+
parser_dialect: Box::new(GenericDialect {}),
258+
unparser_dialect: Box::new(UnparserDefaultDialect {}),
259+
},
260+
TestStatementWithDialect {
261+
sql: "SELECT id FROM (SELECT j1_id from j1) AS c (id)",
262+
expected: r#"SELECT c.id FROM (SELECT j1.j1_id FROM j1) AS c (id)"#,
263+
parser_dialect: Box::new(GenericDialect {}),
264+
unparser_dialect: Box::new(UnparserDefaultDialect {}),
265+
},
266+
TestStatementWithDialect {
267+
sql: "SELECT id FROM (SELECT j1_id as id from j1) AS c",
268+
expected: r#"SELECT c.id FROM (SELECT j1.j1_id AS id FROM j1) AS c"#,
269+
parser_dialect: Box::new(GenericDialect {}),
270+
unparser_dialect: Box::new(UnparserDefaultDialect {}),
271+
},
243272
];
244273

245274
for query in tests {

0 commit comments

Comments
 (0)