From 5ef7723d730a4c62c90b2e47db20a7da41b5f445 Mon Sep 17 00:00:00 2001 From: James Cor Date: Wed, 24 Jan 2024 00:41:06 -0800 Subject: [PATCH] groupby and having getfield index fixes (#2281) --- enginetest/queries/integration_plans.go | 12 +- enginetest/queries/queries.go | 12 +- sql/planbuilder/aggregates.go | 61 +++- sql/planbuilder/parse_test.go | 446 ++++++++++++++++++++---- 4 files changed, 455 insertions(+), 76 deletions(-) diff --git a/enginetest/queries/integration_plans.go b/enginetest/queries/integration_plans.go index 611e52c1d4..4ff284da71 100644 --- a/enginetest/queries/integration_plans.go +++ b/enginetest/queries/integration_plans.go @@ -137,7 +137,7 @@ WHERE " │ ├─ outerVisibility: false\n" + " │ ├─ isLateral: false\n" + " │ ├─ cacheable: true\n" + - " │ ├─ colSet: (45-48)\n" + + " │ ├─ colSet: (44-47)\n" + " │ ├─ tableId: 4\n" + " │ └─ Filter\n" + " │ ├─ AND\n" + @@ -222,7 +222,7 @@ WHERE " └─ IndexedTableAccess(E2I7U)\n" + " ├─ index: [E2I7U.ZH72S]\n" + " ├─ keys: [cl3dt.ZH72S:0]\n" + - " ├─ colSet: (49-65)\n" + + " ├─ colSet: (48-64)\n" + " ├─ tableId: 5\n" + " └─ Table\n" + " ├─ name: E2I7U\n" + @@ -1224,7 +1224,7 @@ WHERE " │ ├─ outerVisibility: false\n" + " │ ├─ isLateral: false\n" + " │ ├─ cacheable: true\n" + - " │ ├─ colSet: (48-51)\n" + + " │ ├─ colSet: (47-50)\n" + " │ ├─ tableId: 4\n" + " │ └─ Filter\n" + " │ ├─ AND\n" + @@ -1309,7 +1309,7 @@ WHERE " └─ IndexedTableAccess(E2I7U)\n" + " ├─ index: [E2I7U.ZH72S]\n" + " ├─ keys: [cl3dt.ZH72S:0]\n" + - " ├─ colSet: (52-68)\n" + + " ├─ colSet: (51-67)\n" + " ├─ tableId: 5\n" + " └─ Table\n" + " ├─ name: E2I7U\n" + @@ -3124,7 +3124,7 @@ WHERE " │ ├─ outerVisibility: false\n" + " │ ├─ isLateral: false\n" + " │ ├─ cacheable: true\n" + - " │ ├─ colSet: (44-47)\n" + + " │ ├─ colSet: (43-46)\n" + " │ ├─ tableId: 4\n" + " │ └─ Filter\n" + " │ ├─ AND\n" + @@ -3209,7 +3209,7 @@ WHERE " └─ IndexedTableAccess(E2I7U)\n" + " ├─ index: [E2I7U.ZH72S]\n" + " ├─ keys: [cl3dt.ZH72S:0]\n" + - " ├─ colSet: (48-64)\n" + + " ├─ colSet: (47-63)\n" + " ├─ tableId: 5\n" + " └─ Table\n" + " ├─ name: E2I7U\n" + diff --git a/enginetest/queries/queries.go b/enginetest/queries/queries.go index 7e870ec246..8b71a69927 100644 --- a/enginetest/queries/queries.go +++ b/enginetest/queries/queries.go @@ -3496,9 +3496,19 @@ Select * from ( }, }, { - Query: `SELECT SUBSTRING_INDEX(mytable.s, "d", 1) AS s FROM mytable INNER JOIN othertable ON (SUBSTRING_INDEX(mytable.s, "d", 1) = SUBSTRING_INDEX(othertable.s2, "d", 1)) GROUP BY 1 HAVING s = 'secon'`, + Query: `SELECT SUBSTRING_INDEX(mytable.s, "d", 1) AS s FROM mytable INNER JOIN othertable ON (SUBSTRING_INDEX(mytable.s, "d", 1) = SUBSTRING_INDEX(othertable.s2, "d", 1)) GROUP BY 1 HAVING s = 'secon';`, Expected: []sql.Row{{"secon"}}, }, + { + Query: `SELECT SUBSTRING_INDEX(mytable.s, "d", 1) AS ss FROM mytable INNER JOIN othertable ON (SUBSTRING_INDEX(mytable.s, "d", 1) = SUBSTRING_INDEX(othertable.s2, "d", 1)) GROUP BY s HAVING s = 'secon';`, + Expected: []sql.Row{}, + }, + { + Query: `SELECT SUBSTRING_INDEX(mytable.s, "d", 1) AS ss FROM mytable INNER JOIN othertable ON (SUBSTRING_INDEX(mytable.s, "d", 1) = SUBSTRING_INDEX(othertable.s2, "d", 1)) GROUP BY ss HAVING ss = 'secon';`, + Expected: []sql.Row{ + {"secon"}, + }, + }, { Query: `SELECT TRIM(mytable.s) AS s FROM mytable`, Expected: []sql.Row{{"first row"}, {"second row"}, {"third row"}}, diff --git a/sql/planbuilder/aggregates.go b/sql/planbuilder/aggregates.go index ea69fa44a8..ae870404a1 100644 --- a/sql/planbuilder/aggregates.go +++ b/sql/planbuilder/aggregates.go @@ -311,6 +311,7 @@ func (b *Builder) buildAggregateFunc(inScope *scope, name string, e *ast.FuncExp switch e := e.(type) { case *expression.GetField: if e.TableId() == 0 { + // TODO: not sure where this came from but it's not true // aliases are not valid aggregate arguments, the alias must be masking a column gf := b.selectExprToExpression(inScope.parent, arg) var ok bool @@ -717,6 +718,7 @@ func (b *Builder) analyzeHaving(fromScope, projScope *scope, having *ast.Where) name := n.Name.Lowered() if isAggregateFunc(name) { // record aggregate + // TODO: this should get projScope as well _ = b.buildAggregateFunc(fromScope, name, n) } else if isWindowFunc(name) { _ = b.buildWindowFunc(fromScope, name, n, (*ast.WindowDef)(n.Over)) @@ -775,12 +777,69 @@ func (b *Builder) buildHaving(fromScope, projScope, outScope *scope, having *ast if fromScope.groupBy == nil { fromScope.initGroupBy() } - havingScope := fromScope.push() + // Having specifies conditions on groups. If not group by is present, all rows implicitly form a single aggregate group. + havingScope := fromScope.push() // TODO: we should not be including the entire fromScope + for _, c := range projScope.cols { + // if a projection overrides a column used in an aggregation, don't use projection + found := false + if fromScope.groupBy != nil && fromScope.groupBy.hasAggs() { + for _, cc := range fromScope.groupBy.aggregations() { + transform.InspectExpr(cc.scalar, func(e sql.Expression) bool { + switch e := e.(type) { + case *expression.GetField: + if strings.EqualFold(c.col, e.Name()) { + found = true + havingScope.addColumn(cc) + return true + } + default: + } + return false + }) + } + } + if found { + continue + } + + alias, isAlias := c.scalar.(*expression.Alias) + if !isAlias { + continue + } + + // Aliased GetFields are allowed in having clauses regardless of weather they are in the group by + _, isGetField := alias.Child.(*expression.GetField) + if isGetField { + havingScope.newColumn(c) + continue + } + + // Aliased expression is allowed if there is no group by (it is implicitly a single group) + if len(fromScope.groupBy.inCols) == 0 { + havingScope.newColumn(c) + continue + } + + // Aliased expressions are allowed in having clauses if they are in the group by + for _, cc := range fromScope.groupBy.inCols { + if c.col == cc.col { + havingScope.newColumn(c) + found = true + break + } + } + + if found { + continue + } + if c.tableId.IsEmpty() { havingScope.newColumn(c) + continue } } + havingScope.groupBy = fromScope.groupBy h := b.buildScalar(havingScope, having.Expr) outScope.node = plan.NewHaving(h, outScope.node) diff --git a/sql/planbuilder/parse_test.go b/sql/planbuilder/parse_test.go index 7b2170d2f3..875d56e11f 100644 --- a/sql/planbuilder/parse_test.go +++ b/sql/planbuilder/parse_test.go @@ -1273,17 +1273,7 @@ Project `, }, { - Query: ` - - - - - - - - - - select + Query: `select x, x*y, ROW_NUMBER() OVER(PARTITION BY x) AS row_num1, @@ -1311,17 +1301,7 @@ Project `, }, { - Query: ` - - - - - - - - - - select + Query: `select x+1 as x, sum(x) OVER(PARTITION BY y ORDER BY x) AS sum from xy @@ -1359,15 +1339,6 @@ Project }, { Query: ` - - - - - - - - - SELECT x, ROW_NUMBER() OVER w AS 'row_number', @@ -1612,17 +1583,7 @@ Project `, }, { - Query: ` - - - - - - - - - - SELECT x + Query: `SELECT x FROM xy WHERE EXISTS (SELECT count(u) AS count_1 FROM uv @@ -1663,17 +1624,7 @@ Project `, }, { - Query: ` - - - - - - - - - - WITH RECURSIVE + Query: `WITH RECURSIVE rt (foo) AS ( SELECT 1 as foo UNION ALL @@ -1879,16 +1830,7 @@ Project `, }, { - Query: ` - - - - - - - - -SELECT fi, COUNT(*) FROM ( + Query: `SELECT fi, COUNT(*) FROM ( SELECT tbl.x AS fi FROM xy tbl ) t @@ -2121,6 +2063,364 @@ Create table myTable ENFORCED `, }, + { + Query: "SELECT x as y FROM xy GROUP BY x HAVING AVG(-y) IS NOT NULL", + ExpectedPlan: ` +Project + ├─ columns: [xy.x:1!null as y] + └─ Having + ├─ NOT + │ └─ avg(-xy.y):5 IS NULL + └─ Project + ├─ columns: [avg(-xy.y):5, xy.x:1!null, xy.x:1!null as y] + └─ GroupBy + ├─ select: AVG(-xy.y), xy.x:1!null + ├─ group: xy.x:1!null + └─ Table + ├─ name: xy + ├─ columns: [x y z] + ├─ colSet: (1-3) + └─ tableId: 1 +`, + }, + { + Query: "select x as xx from xy group by xx having xx = 123;", + ExpectedPlan: ` +Project + ├─ columns: [xy.x:1!null as xx] + └─ Having + ├─ Eq + │ ├─ xx:5!null + │ └─ 123 (tinyint) + └─ Project + ├─ columns: [xy.x:1!null, xy.x:1!null as xx] + └─ GroupBy + ├─ select: xy.x:1!null + ├─ group: xy.x:1!null as xx + └─ Table + ├─ name: xy + ├─ columns: [x y z] + ├─ colSet: (1-3) + └─ tableId: 1 +`, + }, + { + Query: "select x as xx from xy having xx = 123;", + ExpectedPlan: ` +Project + ├─ columns: [xy.x:1!null as xx] + └─ Having + ├─ Eq + │ ├─ xx:5!null + │ └─ 123 (tinyint) + └─ Project + ├─ columns: [xy.x:1!null, xy.y:2!null, xy.z:3!null, xy.x:1!null as xx] + └─ Table + ├─ name: xy + ├─ columns: [x y z] + ├─ colSet: (1-3) + └─ tableId: 1 +`, + }, + { + Query: "select x as xx from xy group by xx having x = 123;", + ExpectedPlan: ` +Project + ├─ columns: [xy.x:1!null as xx] + └─ Having + ├─ Eq + │ ├─ xy.x:1!null + │ └─ 123 (tinyint) + └─ Project + ├─ columns: [xy.x:1!null, xy.x:1!null as xx] + └─ GroupBy + ├─ select: xy.x:1!null + ├─ group: xy.x:1!null as xx + └─ Table + ├─ name: xy + ├─ columns: [x y z] + ├─ colSet: (1-3) + └─ tableId: 1 +`, + }, + { + Query: "select x as xx from xy having x = 123;", + ExpectedPlan: ` +Project + ├─ columns: [xy.x:1!null as xx] + └─ Having + ├─ Eq + │ ├─ xy.x:1!null + │ └─ 123 (tinyint) + └─ Project + ├─ columns: [xy.x:1!null, xy.y:2!null, xy.z:3!null, xy.x:1!null as xx] + └─ Table + ├─ name: xy + ├─ columns: [x y z] + ├─ colSet: (1-3) + └─ tableId: 1 +`, + }, + { + Query: "select x + 1 as xx from xy group by xx having xx = 123;", + ExpectedPlan: ` +Project + ├─ columns: [(xy.x:1!null + 1 (tinyint)) as xx] + └─ Having + ├─ Eq + │ ├─ xx:5!null + │ └─ 123 (tinyint) + └─ Project + ├─ columns: [xy.x:1!null, (xy.x:1!null + 1 (tinyint)) as xx] + └─ GroupBy + ├─ select: xy.x:1!null + ├─ group: (xy.x:1!null + 1 (tinyint)) as xx + └─ Table + ├─ name: xy + ├─ columns: [x y z] + ├─ colSet: (1-3) + └─ tableId: 1 +`, + }, + { + Query: "select x + 1 as xx from xy having xx = 123;", + ExpectedPlan: ` +Project + ├─ columns: [(xy.x:1!null + 1 (tinyint)) as xx] + └─ Having + ├─ Eq + │ ├─ xx:5!null + │ └─ 123 (tinyint) + └─ Project + ├─ columns: [xy.x:1!null, xy.y:2!null, xy.z:3!null, (xy.x:1!null + 1 (tinyint)) as xx] + └─ Table + ├─ name: xy + ├─ columns: [x y z] + ├─ colSet: (1-3) + └─ tableId: 1 +`, + }, + { + Skip: true, + Query: "select x + 1 as xx from xy group by xx having x = 123; -- should error", + }, + { + Skip: true, + Query: "select x + 1 as xx from xy having x = 123; -- should error", + }, + { + Query: "select x as xx from xy join uv on (x = u) group by xx having xx = 123;", + ExpectedPlan: ` +Project + ├─ columns: [xy.x:1!null as xx] + └─ Having + ├─ Eq + │ ├─ xx:8!null + │ └─ 123 (tinyint) + └─ Project + ├─ columns: [xy.x:1!null, xy.x:1!null as xx] + └─ GroupBy + ├─ select: xy.x:1!null + ├─ group: xy.x:1!null as xx + └─ InnerJoin + ├─ Eq + │ ├─ xy.x:1!null + │ └─ uv.u:4!null + ├─ Table + │ ├─ name: xy + │ ├─ columns: [x y z] + │ ├─ colSet: (1-3) + │ └─ tableId: 1 + └─ Table + ├─ name: uv + ├─ columns: [u v w] + ├─ colSet: (4-6) + └─ tableId: 2 +`, + }, + { + Query: "select x as xx from xy join uv on (x = u) having xx = 123;", + ExpectedPlan: ` +Project + ├─ columns: [xy.x:1!null as xx] + └─ Having + ├─ Eq + │ ├─ xx:8!null + │ └─ 123 (tinyint) + └─ Project + ├─ columns: [xy.x:1!null, xy.y:2!null, xy.z:3!null, uv.u:4!null, uv.v:5!null, uv.w:6!null, xy.x:1!null as xx] + └─ InnerJoin + ├─ Eq + │ ├─ xy.x:1!null + │ └─ uv.u:4!null + ├─ Table + │ ├─ name: xy + │ ├─ columns: [x y z] + │ ├─ colSet: (1-3) + │ └─ tableId: 1 + └─ Table + ├─ name: uv + ├─ columns: [u v w] + ├─ colSet: (4-6) + └─ tableId: 2 +`, + }, + { + Query: "select x as xx from xy join uv on (x = u) group by xx having x = 123;", + ExpectedPlan: ` +Project + ├─ columns: [xy.x:1!null as xx] + └─ Having + ├─ Eq + │ ├─ xy.x:1!null + │ └─ 123 (tinyint) + └─ Project + ├─ columns: [xy.x:1!null, xy.x:1!null as xx] + └─ GroupBy + ├─ select: xy.x:1!null + ├─ group: xy.x:1!null as xx + └─ InnerJoin + ├─ Eq + │ ├─ xy.x:1!null + │ └─ uv.u:4!null + ├─ Table + │ ├─ name: xy + │ ├─ columns: [x y z] + │ ├─ colSet: (1-3) + │ └─ tableId: 1 + └─ Table + ├─ name: uv + ├─ columns: [u v w] + ├─ colSet: (4-6) + └─ tableId: 2 +`, + }, + { + Query: "select x as xx from xy join uv on (x = u) having x = 123;", + ExpectedPlan: ` +Project + ├─ columns: [xy.x:1!null as xx] + └─ Having + ├─ Eq + │ ├─ xy.x:1!null + │ └─ 123 (tinyint) + └─ Project + ├─ columns: [xy.x:1!null, xy.y:2!null, xy.z:3!null, uv.u:4!null, uv.v:5!null, uv.w:6!null, xy.x:1!null as xx] + └─ InnerJoin + ├─ Eq + │ ├─ xy.x:1!null + │ └─ uv.u:4!null + ├─ Table + │ ├─ name: xy + │ ├─ columns: [x y z] + │ ├─ colSet: (1-3) + │ └─ tableId: 1 + └─ Table + ├─ name: uv + ├─ columns: [u v w] + ├─ colSet: (4-6) + └─ tableId: 2 +`, + }, + { + Query: "select x + 1 as xx from xy join uv on (x = u) group by xx having xx = 123;", + ExpectedPlan: ` +Project + ├─ columns: [(xy.x:1!null + 1 (tinyint)) as xx] + └─ Having + ├─ Eq + │ ├─ xx:8!null + │ └─ 123 (tinyint) + └─ Project + ├─ columns: [xy.x:1!null, (xy.x:1!null + 1 (tinyint)) as xx] + └─ GroupBy + ├─ select: xy.x:1!null + ├─ group: (xy.x:1!null + 1 (tinyint)) as xx + └─ InnerJoin + ├─ Eq + │ ├─ xy.x:1!null + │ └─ uv.u:4!null + ├─ Table + │ ├─ name: xy + │ ├─ columns: [x y z] + │ ├─ colSet: (1-3) + │ └─ tableId: 1 + └─ Table + ├─ name: uv + ├─ columns: [u v w] + ├─ colSet: (4-6) + └─ tableId: 2 +`, + }, + { + Query: "select x + 1 as xx from xy join uv on (x = u) having xx = 123;", + ExpectedPlan: ` +Project + ├─ columns: [(xy.x:1!null + 1 (tinyint)) as xx] + └─ Having + ├─ Eq + │ ├─ xx:8!null + │ └─ 123 (tinyint) + └─ Project + ├─ columns: [xy.x:1!null, xy.y:2!null, xy.z:3!null, uv.u:4!null, uv.v:5!null, uv.w:6!null, (xy.x:1!null + 1 (tinyint)) as xx] + └─ InnerJoin + ├─ Eq + │ ├─ xy.x:1!null + │ └─ uv.u:4!null + ├─ Table + │ ├─ name: xy + │ ├─ columns: [x y z] + │ ├─ colSet: (1-3) + │ └─ tableId: 1 + └─ Table + ├─ name: uv + ├─ columns: [u v w] + ├─ colSet: (4-6) + └─ tableId: 2 +`, + }, + { + Skip: true, + Query: "select x + 1 as xx from xy join uv on (x = u) group by xx having x = 123; -- should error", + }, + { + Skip: true, + Query: "select x + 1 as xx from xy join uv on (x = u) having x = 123; -- should error", + }, + { + Query: "select x +1 as xx from xy join uv on (x = u) group by x having avg(x) = 123;", + ExpectedPlan: ` +Project + ├─ columns: [(xy.x:1!null + 1 (tinyint)) as xx] + └─ Having + ├─ Eq + │ ├─ avg(xy.x):8 + │ └─ 123 (tinyint) + └─ Project + ├─ columns: [avg(xy.x):8, xy.x:1!null, (xy.x:1!null + 1 (tinyint)) as xx] + └─ GroupBy + ├─ select: AVG(xy.x:1!null), xy.x:1!null + ├─ group: xy.x:1!null + └─ InnerJoin + ├─ Eq + │ ├─ xy.x:1!null + │ └─ uv.u:4!null + ├─ Table + │ ├─ name: xy + │ ├─ columns: [x y z] + │ ├─ colSet: (1-3) + │ └─ tableId: 1 + └─ Table + ├─ name: uv + ├─ columns: [u v w] + ├─ colSet: (4-6) + └─ tableId: 2 +`, + }, + { + Skip: true, + Query: "select x + 1 as xx from xy join uv on (x = u) group by xx having avg(xx) = 123;", + }, } var w *bufio.Writer @@ -2157,6 +2457,16 @@ Create table myTable for _, tt := range tests { t.Run(tt.Query, func(t *testing.T) { if tt.Skip { + if rewrite { + w.WriteString("\t{\n") + w.WriteString(fmt.Sprintf("\t\tSkip: true,\n")) + if strings.Contains(tt.Query, "\n") { + w.WriteString(fmt.Sprintf("\t\tQuery: `\n%s`,\n", tt.Query)) + } else { + w.WriteString(fmt.Sprintf("\t\tQuery: \"%s\",\n", tt.Query)) + } + w.WriteString("\t},\n") + } t.Skip() } stmt, err := sqlparser.Parse(tt.Query) @@ -2167,14 +2477,14 @@ Create table myTable plan := sql.DebugString(outScope.node) if rewrite { - w.WriteString(" {\n") + w.WriteString("\t{\n") if strings.Contains(tt.Query, "\n") { - w.WriteString(fmt.Sprintf(" Query: `\n%s`,\n", tt.Query)) + w.WriteString(fmt.Sprintf("\t\tQuery: `\n%s`,\n", tt.Query)) } else { - w.WriteString(fmt.Sprintf(" Query: \"%s\",\n", tt.Query)) + w.WriteString(fmt.Sprintf("\t\tQuery: \"%s\",\n", tt.Query)) } - w.WriteString(fmt.Sprintf(" ExpectedPlan: `\n%s`,\n", plan)) - w.WriteString(" },\n") + w.WriteString(fmt.Sprintf("\t\tExpectedPlan: `\n%s`,\n", plan)) + w.WriteString("\t},\n") } if verbose { print(plan)