Skip to content

Commit

Permalink
Move subquery alias assignment onto rules (#4767)
Browse files Browse the repository at this point in the history
  • Loading branch information
tustvold authored Dec 30, 2022
1 parent f7477dc commit cf45eb9
Show file tree
Hide file tree
Showing 16 changed files with 277 additions and 246 deletions.
4 changes: 2 additions & 2 deletions benchmarks/expected-plans/q11.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Sort: value DESC NULLS FIRST
Projection: partsupp.ps_partkey, SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS value
Filter: CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS Decimal128(38, 15)) > CAST(__sq_1.__value AS Decimal128(38, 15))
Filter: CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS Decimal128(38, 15)) > CAST(__scalar_sq_1.__value AS Decimal128(38, 15))
CrossJoin:
Aggregate: groupBy=[[partsupp.ps_partkey]], aggr=[[SUM(CAST(partsupp.ps_supplycost AS Decimal128(26, 2)) * CAST(partsupp.ps_availqty AS Decimal128(26, 2)))]]
Inner Join: supplier.s_nationkey = nation.n_nationkey
Expand All @@ -9,7 +9,7 @@ Sort: value DESC NULLS FIRST
TableScan: supplier projection=[s_suppkey, s_nationkey]
Filter: nation.n_name = Utf8("GERMANY")
TableScan: nation projection=[n_nationkey, n_name]
SubqueryAlias: __sq_1
SubqueryAlias: __scalar_sq_1
Projection: CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS Float64) * Float64(0.0001) AS __value
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(partsupp.ps_supplycost AS Decimal128(26, 2)) * CAST(partsupp.ps_availqty AS Decimal128(26, 2)))]]
Inner Join: supplier.s_nationkey = nation.n_nationkey
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/expected-plans/q15.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
EmptyRelation
Sort: supplier.s_suppkey ASC NULLS LAST
Projection: supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, revenue0.total_revenue
Inner Join: revenue0.total_revenue = __sq_1.__value
Inner Join: revenue0.total_revenue = __scalar_sq_1.__value
Inner Join: supplier.s_suppkey = revenue0.supplier_no
TableScan: supplier projection=[s_suppkey, s_name, s_address, s_phone]
SubqueryAlias: revenue0
Expand All @@ -10,7 +10,7 @@ Sort: supplier.s_suppkey ASC NULLS LAST
Aggregate: groupBy=[[lineitem.l_suppkey]], aggr=[[SUM(CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]]
Filter: lineitem.l_shipdate >= Date32("9496") AND lineitem.l_shipdate < Date32("9587")
TableScan: lineitem projection=[l_suppkey, l_extendedprice, l_discount, l_shipdate]
SubqueryAlias: __sq_1
SubqueryAlias: __scalar_sq_1
Projection: MAX(revenue0.total_revenue) AS __value
Aggregate: groupBy=[[]], aggr=[[MAX(revenue0.total_revenue)]]
SubqueryAlias: revenue0
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/expected-plans/q16.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@ Sort: supplier_cnt DESC NULLS FIRST, part.p_brand ASC NULLS LAST, part.p_type AS
Projection: group_alias_0 AS part.p_brand, group_alias_1 AS part.p_type, group_alias_2 AS part.p_size, COUNT(alias1) AS COUNT(DISTINCT partsupp.ps_suppkey)
Aggregate: groupBy=[[group_alias_0, group_alias_1, group_alias_2]], aggr=[[COUNT(alias1)]]
Aggregate: groupBy=[[part.p_brand AS group_alias_0, part.p_type AS group_alias_1, part.p_size AS group_alias_2, partsupp.ps_suppkey AS alias1]], aggr=[[]]
LeftAnti Join: partsupp.ps_suppkey = __sq_1.s_suppkey
LeftAnti Join: partsupp.ps_suppkey = __correlated_sq_1.s_suppkey
Inner Join: partsupp.ps_partkey = part.p_partkey
TableScan: partsupp projection=[ps_partkey, ps_suppkey]
Filter: part.p_brand != Utf8("Brand#45") AND part.p_type NOT LIKE Utf8("MEDIUM POLISHED%") AND part.p_size IN ([Int32(49), Int32(14), Int32(23), Int32(45), Int32(19), Int32(3), Int32(36), Int32(9)])
TableScan: part projection=[p_partkey, p_brand, p_type, p_size]
SubqueryAlias: __sq_1
SubqueryAlias: __correlated_sq_1
Projection: supplier.s_suppkey AS s_suppkey
Filter: supplier.s_comment LIKE Utf8("%Customer%Complaints%")
TableScan: supplier projection=[s_suppkey, s_comment]
13 changes: 7 additions & 6 deletions benchmarks/expected-plans/q17.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
Projection: CAST(SUM(lineitem.l_extendedprice) AS Decimal128(38, 33)) / Decimal128(Some(7000000000000000195487369212723200),38,33) AS avg_yearly
Projection: CAST(SUM(lineitem.l_extendedprice) AS Float64) / Float64(7) AS avg_yearly
Aggregate: groupBy=[[]], aggr=[[SUM(lineitem.l_extendedprice)]]
Filter: CAST(lineitem.l_quantity AS Decimal128(38, 21)) < __sq_1.__value
Inner Join: part.p_partkey = __sq_1.l_partkey
Filter: CAST(lineitem.l_quantity AS Decimal128(30, 15)) < CAST(__scalar_sq_1.__value AS Decimal128(30, 15))
Inner Join: part.p_partkey = __scalar_sq_1.l_partkey, lineitem.l_partkey = __scalar_sq_1.l_partkey
Inner Join: lineitem.l_partkey = part.p_partkey
TableScan: lineitem projection=[l_partkey, l_quantity, l_extendedprice]
Filter: part.p_brand = Utf8("Brand#23") AND part.p_container = Utf8("MED BOX")
TableScan: part projection=[p_partkey, p_brand, p_container]
Projection: lineitem.l_partkey, Decimal128(Some(200000000000000000000),38,21) * CAST(AVG(lineitem.l_quantity) AS Decimal128(38, 21)) AS __value, alias=__sq_1
Aggregate: groupBy=[[lineitem.l_partkey]], aggr=[[AVG(lineitem.l_quantity)]]
TableScan: lineitem projection=[l_partkey, l_quantity, l_extendedprice]
SubqueryAlias: __scalar_sq_1
Projection: lineitem.l_partkey, Float64(0.2) * CAST(AVG(lineitem.l_quantity) AS Float64) AS __value
Aggregate: groupBy=[[lineitem.l_partkey]], aggr=[[AVG(lineitem.l_quantity)]]
TableScan: lineitem projection=[l_partkey, l_quantity]
4 changes: 2 additions & 2 deletions benchmarks/expected-plans/q18.txt
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
Sort: orders.o_totalprice DESC NULLS FIRST, orders.o_orderdate ASC NULLS LAST
Projection: customer.c_name, customer.c_custkey, orders.o_orderkey, orders.o_orderdate, orders.o_totalprice, SUM(lineitem.l_quantity)
Aggregate: groupBy=[[customer.c_name, customer.c_custkey, orders.o_orderkey, orders.o_orderdate, orders.o_totalprice]], aggr=[[SUM(lineitem.l_quantity)]]
LeftSemi Join: orders.o_orderkey = __sq_1.l_orderkey
LeftSemi Join: orders.o_orderkey = __correlated_sq_1.l_orderkey
Inner Join: orders.o_orderkey = lineitem.l_orderkey
Inner Join: customer.c_custkey = orders.o_custkey
TableScan: customer projection=[c_custkey, c_name]
TableScan: orders projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate]
TableScan: lineitem projection=[l_orderkey, l_quantity]
SubqueryAlias: __sq_1
SubqueryAlias: __correlated_sq_1
Projection: lineitem.l_orderkey AS l_orderkey
Filter: SUM(lineitem.l_quantity) > Decimal128(Some(30000),25,2)
Aggregate: groupBy=[[lineitem.l_orderkey]], aggr=[[SUM(lineitem.l_quantity)]]
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/expected-plans/q2.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Sort: supplier.s_acctbal DESC NULLS FIRST, nation.n_name ASC NULLS LAST, supplier.s_name ASC NULLS LAST, part.p_partkey ASC NULLS LAST
Projection: supplier.s_acctbal, supplier.s_name, nation.n_name, part.p_partkey, part.p_mfgr, supplier.s_address, supplier.s_phone, supplier.s_comment
Projection: part.p_partkey, part.p_mfgr, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_acctbal, supplier.s_comment, nation.n_name
Inner Join: part.p_partkey = __sq_1.ps_partkey, partsupp.ps_supplycost = __sq_1.__value
Inner Join: part.p_partkey = __scalar_sq_1.ps_partkey, partsupp.ps_supplycost = __scalar_sq_1.__value
Inner Join: nation.n_regionkey = region.r_regionkey
Inner Join: supplier.s_nationkey = nation.n_nationkey
Inner Join: partsupp.ps_suppkey = supplier.s_suppkey
Expand All @@ -13,7 +13,7 @@ Sort: supplier.s_acctbal DESC NULLS FIRST, nation.n_name ASC NULLS LAST, supplie
TableScan: nation projection=[n_nationkey, n_name, n_regionkey]
Filter: region.r_name = Utf8("EUROPE")
TableScan: region projection=[r_regionkey, r_name]
SubqueryAlias: __sq_1
SubqueryAlias: __scalar_sq_1
Projection: partsupp.ps_partkey, MIN(partsupp.ps_supplycost) AS __value
Aggregate: groupBy=[[partsupp.ps_partkey]], aggr=[[MIN(partsupp.ps_supplycost)]]
Inner Join: nation.n_regionkey = region.r_regionkey
Expand Down
14 changes: 7 additions & 7 deletions benchmarks/expected-plans/q20.txt
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
Sort: supplier.s_name ASC NULLS LAST
Projection: supplier.s_name, supplier.s_address
LeftSemi Join: supplier.s_suppkey = __sq_1.ps_suppkey
LeftSemi Join: supplier.s_suppkey = __correlated_sq_1.ps_suppkey
Inner Join: supplier.s_nationkey = nation.n_nationkey
TableScan: supplier projection=[s_suppkey, s_name, s_address, s_nationkey]
Filter: nation.n_name = Utf8("CANADA")
TableScan: nation projection=[n_nationkey, n_name]
SubqueryAlias: __sq_1
SubqueryAlias: __correlated_sq_1
Projection: partsupp.ps_suppkey AS ps_suppkey
Filter: CAST(partsupp.ps_availqty AS Float64) > __sq_3.__value
Inner Join: partsupp.ps_partkey = __sq_3.l_partkey, partsupp.ps_suppkey = __sq_3.l_suppkey
LeftSemi Join: partsupp.ps_partkey = __sq_2.p_partkey
Filter: CAST(partsupp.ps_availqty AS Float64) > __scalar_sq_1.__value
Inner Join: partsupp.ps_partkey = __scalar_sq_1.l_partkey, partsupp.ps_suppkey = __scalar_sq_1.l_suppkey
LeftSemi Join: partsupp.ps_partkey = __correlated_sq_2.p_partkey
TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_availqty]
SubqueryAlias: __sq_2
SubqueryAlias: __correlated_sq_2
Projection: part.p_partkey AS p_partkey
Filter: part.p_name LIKE Utf8("forest%")
TableScan: part projection=[p_partkey, p_name]
SubqueryAlias: __sq_3
SubqueryAlias: __scalar_sq_1
Projection: lineitem.l_partkey, lineitem.l_suppkey, Float64(0.5) * CAST(SUM(lineitem.l_quantity) AS Float64) AS __value
Aggregate: groupBy=[[lineitem.l_partkey, lineitem.l_suppkey]], aggr=[[SUM(lineitem.l_quantity)]]
Filter: lineitem.l_shipdate >= Date32("8766") AND lineitem.l_shipdate < Date32("9131")
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/expected-plans/q22.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@ Sort: custsale.cntrycode ASC NULLS LAST
Aggregate: groupBy=[[custsale.cntrycode]], aggr=[[COUNT(UInt8(1)), SUM(custsale.c_acctbal)]]
SubqueryAlias: custsale
Projection: substr(customer.c_phone, Int64(1), Int64(2)) AS cntrycode, customer.c_acctbal
Filter: CAST(customer.c_acctbal AS Decimal128(19, 6)) > __sq_1.__value
Filter: CAST(customer.c_acctbal AS Decimal128(19, 6)) > __scalar_sq_1.__value
CrossJoin:
LeftAnti Join: customer.c_custkey = orders.o_custkey
Filter: substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("13"), Utf8("31"), Utf8("23"), Utf8("29"), Utf8("30"), Utf8("18"), Utf8("17")])
TableScan: customer projection=[c_custkey, c_phone, c_acctbal]
TableScan: orders projection=[o_custkey]
SubqueryAlias: __sq_1
SubqueryAlias: __scalar_sq_1
Projection: AVG(customer.c_acctbal) AS __value
Aggregate: groupBy=[[]], aggr=[[AVG(customer.c_acctbal)]]
Filter: customer.c_acctbal > Decimal128(Some(0),15,2) AND substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("13"), Utf8("31"), Utf8("23"), Utf8("29"), Utf8("30"), Utf8("18"), Utf8("17")])
Expand Down
12 changes: 1 addition & 11 deletions benchmarks/src/bin/tpch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -594,17 +594,7 @@ mod tests {
expected_plan(16).await
}

/// This query produces different plans depending on operating system. The difference is
/// due to re-writing the following expression:
///
/// `sum(l_extendedprice) / 7.0 as avg_yearly`
///
/// Linux: Decimal128(Some(7000000000000000195487369212723200),38,33)
/// Windows: Decimal128(Some(6999999999999999042565864605876224),38,33)
///
/// See https://github.com/apache/arrow-datafusion/issues/3791
#[tokio::test]
#[ignore]
#[tokio::test]
async fn q17_expected_plan() -> Result<()> {
expected_plan(17).await
}
Expand Down
Loading

0 comments on commit cf45eb9

Please sign in to comment.