From 73cc61f0d978be209b5ac2789d068052ab831a57 Mon Sep 17 00:00:00 2001 From: Justin Jaffray Date: Wed, 20 Mar 2019 12:08:15 -0400 Subject: [PATCH] opt: fix zigzag joins with IS condition This commit fixes a problem where the constraint logic could infer that a column was constant, but our method of determining what that constant value was was incomplete and so we ended up with incomplete information. We now correctly infer the value for IS expressions, and also skip over cases where this happens again so we degrade more gracefully (don't run a zigzag join, as opposed to panicking). A more correct fix here would extract the values directly from the constraints, but I'd like to get this fixed on master to unblock #34695. Release note: None --- pkg/sql/opt/exec/execbuilder/testdata/join | 31 ++++++++++++++++++++++ pkg/sql/opt/memo/expr_format.go | 4 +-- pkg/sql/opt/memo/extract.go | 11 +++++--- pkg/sql/opt/xform/custom_funcs.go | 14 ++++++++++ pkg/sql/opt/xform/testdata/rules/join | 13 +++++++++ pkg/testutils/lint/lint_test.go | 2 +- 6 files changed, 68 insertions(+), 7 deletions(-) diff --git a/pkg/sql/opt/exec/execbuilder/testdata/join b/pkg/sql/opt/exec/execbuilder/testdata/join index 46424410853f..49d61c69d9fa 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/join +++ b/pkg/sql/opt/exec/execbuilder/testdata/join @@ -1449,6 +1449,37 @@ lookup-join · · · table zigzag@c_idx · fixedvals 1 column + +# Regression test for part of #34695. +statement ok +CREATE TABLE zigzag2 ( + a INT, + b INT, + c INT, + d INT, + UNIQUE INDEX a_b_idx(a, b), + INDEX c_idx(c) +) + +# Check a value which is equated to NULL. + +query TTT +EXPLAIN SELECT * FROM zigzag2 WHERE a = 1 AND b = 2 AND c IS NULL +---- +render · · + └── lookup-join · · + │ table zigzag2@primary + │ type inner + └── zigzag-join · · + │ type inner + │ pred ((@1 = 1) AND (@2 = 2)) AND (@4 IS NULL) + ├── scan · · + │ table zigzag2@a_b_idx + │ fixedvals 2 columns + └── scan · · +· table zigzag2@c_idx +· fixedvals 1 column + # Test that we can force a merge join. query TTT EXPLAIN SELECT * FROM onecolumn INNER MERGE JOIN twocolumn USING(x) diff --git a/pkg/sql/opt/memo/expr_format.go b/pkg/sql/opt/memo/expr_format.go index 2b94f7dfea0f..ff4e32612783 100644 --- a/pkg/sql/opt/memo/expr_format.go +++ b/pkg/sql/opt/memo/expr_format.go @@ -298,10 +298,10 @@ func (f *ExprFmtCtx) formatRelational(e RelExpr, tp treeprinter.Node) { // FixedVals is always going to be a ScalarListExpr, containing tuples, // containing one ScalarListExpr, containing ConstExprs. for i := range t.LeftFixedCols { - leftVals[i] = t.FixedVals[0].Child(0).Child(i).(*ConstExpr).Value + leftVals[i] = ExtractConstDatum(t.FixedVals[0].Child(0).Child(i)) } for i := range t.RightFixedCols { - rightVals[i] = t.FixedVals[1].Child(0).Child(i).(*ConstExpr).Value + rightVals[i] = ExtractConstDatum(t.FixedVals[1].Child(0).Child(i)) } tp.Childf("left fixed columns: %v = %v", t.LeftFixedCols, leftVals) tp.Childf("right fixed columns: %v = %v", t.RightFixedCols, rightVals) diff --git a/pkg/sql/opt/memo/extract.go b/pkg/sql/opt/memo/extract.go index 942b84724c2a..9af1f5544976 100644 --- a/pkg/sql/opt/memo/extract.go +++ b/pkg/sql/opt/memo/extract.go @@ -279,12 +279,15 @@ func ExtractValuesFromFilter(on FiltersExpr, cols opt.ColSet) map[opt.ColumnID]t // extractConstEquality extracts a column that's being equated to a constant // value if possible. func extractConstEquality(condition opt.ScalarExpr) (bool, int, tree.Datum) { - if eq, ok := condition.(*EqExpr); ok { + // TODO(justin): this is error-prone because this logic is different from the + // constraint logic. Extract these values directly from the constraints. + switch condition.(type) { + case *EqExpr, *IsExpr: // Only check the left side - the variable is always on the left side // due to the CommuteVar norm rule. - if leftVar, ok := eq.Left.(*VariableExpr); ok { - if CanExtractConstDatum(eq.Right) { - return true, int(leftVar.Col), ExtractConstDatum(eq.Right) + if leftVar, ok := condition.Child(0).(*VariableExpr); ok { + if CanExtractConstDatum(condition.Child(1)) { + return true, int(leftVar.Col), ExtractConstDatum(condition.Child(1)) } } } diff --git a/pkg/sql/opt/xform/custom_funcs.go b/pkg/sql/opt/xform/custom_funcs.go index 611d0a66107a..7ca282eae4ec 100644 --- a/pkg/sql/opt/xform/custom_funcs.go +++ b/pkg/sql/opt/xform/custom_funcs.go @@ -24,6 +24,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/opt/ordering" "github.com/cockroachdb/cockroach/pkg/sql/opt/props" "github.com/cockroachdb/cockroach/pkg/sql/opt/props/physical" + "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror" "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" "github.com/cockroachdb/cockroach/pkg/sql/sem/types" "github.com/cockroachdb/cockroach/pkg/util" @@ -1062,6 +1063,19 @@ func (c *CustomFuncs) GenerateZigzagJoins( // Fixed values are represented as tuples consisting of the // fixed segment of that side's index. fixedValMap := memo.ExtractValuesFromFilter(filters, fixedCols) + + if len(fixedValMap) != fixedCols.Len() { + if util.RaceEnabled { + panic(pgerror.NewAssertionErrorf( + "we inferred constant columns whose value we couldn't extract", + )) + } + + // This is a bug, but we don't want to block queries from running because of it. + // TODO(justin): remove this when we fix extractConstEquality. + continue + } + leftFixedCols, leftVals, leftTypes := c.fixedColsForZigzag( iter.index, scanPrivate.Table, fixedValMap, ) diff --git a/pkg/sql/opt/xform/testdata/rules/join b/pkg/sql/opt/xform/testdata/rules/join index beaba5879beb..a545937749b8 100644 --- a/pkg/sql/opt/xform/testdata/rules/join +++ b/pkg/sql/opt/xform/testdata/rules/join @@ -1422,6 +1422,19 @@ inner-join (zigzag pqr@q pqr@r) ├── q = 1 [type=bool, outer=(2), constraints=(/2: [/1 - /1]; tight), fd=()-->(2)] └── r = 2 [type=bool, outer=(3), constraints=(/3: [/2 - /2]; tight), fd=()-->(3)] +opt +SELECT q,r FROM pqr WHERE q = 1 AND r IS NULL +---- +inner-join (zigzag pqr@q pqr@r) + ├── columns: q:2(int!null) r:3(int) + ├── eq columns: [1] = [1] + ├── left fixed columns: [2] = [1] + ├── right fixed columns: [3] = [NULL] + ├── fd: ()-->(2,3) + └── filters + ├── q = 1 [type=bool, outer=(2), constraints=(/2: [/1 - /1]; tight), fd=()-->(2)] + └── r IS NULL [type=bool, outer=(3), constraints=(/3: [/NULL - /NULL]; tight), fd=()-->(3)] + memo SELECT q,r FROM pqr WHERE q = 1 AND r = 2 ---- diff --git a/pkg/testutils/lint/lint_test.go b/pkg/testutils/lint/lint_test.go index dedbc0315fa7..3dabcc49cc37 100644 --- a/pkg/testutils/lint/lint_test.go +++ b/pkg/testutils/lint/lint_test.go @@ -414,7 +414,7 @@ func TestLint(t *testing.T) { "git", "grep", "-nE", - `[^[:alnum:]]panic\(("|[a-z]+Error\{errors\.(New|Errorf)|fmt\.Errorf)`, + fmt.Sprintf(`[^[:alnum:]]panic\((%s|"|[a-z]+Error\{errors\.(New|Errorf)|fmt\.Errorf)`, "`"), "--", "sql/opt", ":!sql/opt/optgen",