From efe04371456ae342d0dba6108d3a404a0b59c913 Mon Sep 17 00:00:00 2001 From: Kenan Yao Date: Thu, 30 Aug 2018 16:02:13 +0800 Subject: [PATCH 01/15] ranger: merge Eq and In expressions if possible --- util/ranger/detacher.go | 69 ++++++++++++++++++++++++++++++++++------- util/ranger/ranger.go | 29 +++++++++++++++++ 2 files changed, 86 insertions(+), 12 deletions(-) diff --git a/util/ranger/detacher.go b/util/ranger/detacher.go index ab3ac5686d0e6..f4775761d67c2 100644 --- a/util/ranger/detacher.go +++ b/util/ranger/detacher.go @@ -88,10 +88,9 @@ func detachColumnDNFConditions(sctx sessionctx.Context, conditions []expression. return accessConditions, hasResidualConditions } -// getEqOrInColOffset checks if the expression is a eq function that one side is constant and another is column or an -// in function which is `column in (constant list)`. +// getEqColOffset checks if the expression is a eq function that one side is constant and another is column. // If so, it will return the offset of this column in the slice, otherwise return -1 for not found. -func getEqOrInColOffset(expr expression.Expression, cols []*expression.Column) int { +func getEqColOffset(expr expression.Expression, cols []*expression.Column) int { f, ok := expr.(*expression.ScalarFunction) if !ok { return -1 @@ -116,6 +115,16 @@ func getEqOrInColOffset(expr expression.Expression, cols []*expression.Column) i } } } + return -1 +} + +// getInColOffset checks if the expression is an in function which is `column in (constant list)`. +// If so, it will return the offset of this column in the slice, otherwise return -1 for not found. +func getInColOffset(expr expression.Expression, cols []*expression.Column) int { + f, ok := expr.(*expression.ScalarFunction) + if !ok { + return -1 + } if f.FuncName.L == ast.In { c, ok := f.GetArgs()[0].(*expression.Column) if !ok { @@ -146,7 +155,7 @@ func detachCNFCondAndBuildRangeForIndex(sctx sessionctx.Context, conditions []ex err error ) - accessConds, filterConds := extractEqAndInCondition(conditions, cols, lengths) + accessConds, filterConds, conditions := extractEqAndInCondition(sctx, conditions, cols, lengths) for ; eqCount < len(accessConds); eqCount++ { if accessConds[eqCount].(*expression.ScalarFunction).FuncName.L != ast.EQ { @@ -187,14 +196,50 @@ func detachCNFCondAndBuildRangeForIndex(sctx sessionctx.Context, conditions []ex return ranges, accessConds, filterConds, eqCount, errors.Trace(err) } -func extractEqAndInCondition(conditions []expression.Expression, cols []*expression.Column, - lengths []int) (accesses, filters []expression.Expression) { - accesses = make([]expression.Expression, len(cols)) - for _, cond := range conditions { - offset := getEqOrInColOffset(cond, cols) - if offset != -1 { - accesses[offset] = cond +func extractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Expression, + cols []*expression.Column, lengths []int) ([]expression.Expression, []expression.Expression, []expression.Expression) { + rb := builder{sc: sctx.GetSessionVars().StmtCtx} + accesses := make([]expression.Expression, len(cols)) + filters := make([]expression.Expression, len(cols)) + points := make([][]point, len(cols)) + mergedAccesses := make([]expression.Expression, len(cols)) + condOffset := make([]int, len(cols)) + //should not use range to iterate conditions, because we would delete items while iterating + for i := len(conditions) - 1; i >= 0; i-- { + eqOffset := getEqColOffset(conditions[i], cols) + inOffset := getInColOffset(conditions[i], cols) + offset := eqOffset + if eqOffset == -1 { + if inOffset == -1 { + continue + } + offset = inOffset + } + if accesses[offset] == nil { + accesses[offset] = conditions[i] + condOffset[offset] = i + continue + } + //multiple Eq/In conditions for one column in CNF, apply intersection on them + //lazily compute the points for the first visited Eq/In + if mergedAccesses[offset] == nil { + mergedAccesses[offset] = accesses[offset] + conditions = append(conditions[:condOffset[offset]], conditions[condOffset[offset]+1:]...) + points[offset] = rb.build(accesses[offset]) + } + points[offset] = rb.intersection(points[offset], rb.build(conditions[i])) + if inOffset == -1 { + mergedAccesses[offset] = conditions[i] + } + conditions = append(conditions[:i], conditions[i+1:]...) + } + for i, ma := range mergedAccesses { + if ma == nil { + continue } + accesses[i] = points2EqOrInCond(sctx, points[i], mergedAccesses[i]) + //XXX check nil + conditions = append(conditions, accesses[i]) } for i, cond := range accesses { if cond == nil { @@ -205,7 +250,7 @@ func extractEqAndInCondition(conditions []expression.Expression, cols []*express filters = append(filters, cond) } } - return accesses, filters + return accesses, filters, conditions } // detachDNFCondAndBuildRangeForIndex will detach the index filters from table filters when it's a DNF. diff --git a/util/ranger/ranger.go b/util/ranger/ranger.go index 3f35efd83ee1f..5cef2e26e1aaf 100644 --- a/util/ranger/ranger.go +++ b/util/ranger/ranger.go @@ -24,6 +24,7 @@ import ( "github.com/pingcap/tidb/expression" "github.com/pingcap/tidb/kv" "github.com/pingcap/tidb/mysql" + "github.com/pingcap/tidb/sessionctx" "github.com/pingcap/tidb/sessionctx/stmtctx" "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/util/charset" @@ -466,3 +467,31 @@ func newFieldType(tp *types.FieldType) *types.FieldType { return tp } } + +func points2EqOrInCond(ctx sessionctx.Context, points []point, expr expression.Expression) expression.Expression { + if len(points) == 0 { + return nil + } + sf, _ := expr.(*expression.ScalarFunction) + //Constant and Column args should have same RetType, simply get from first arg + retType := sf.GetArgs()[0].GetType() + values := make([]expression.Expression, 0, len(points)/2) + if sf.FuncName.L == ast.EQ { + if c, ok := sf.GetArgs()[0].(*expression.Column); ok { + values = append(values, c) + } else if c, ok := sf.GetArgs()[1].(*expression.Column); ok { + values = append(values, c) + } + } else { + values = append(values, sf.GetArgs()[0]) + } + for i := 0; i < len(points); i = i + 2 { + value := &expression.Constant{ + Value: points[i].value, + RetType: retType, + } + values = append(values, value) + } + f := expression.NewFunctionInternal(ctx, sf.FuncName.L, sf.GetType(), values...) + return f +} From 52f0b7bc10f6c3fcec5ae2c52e259f4054b41109 Mon Sep 17 00:00:00 2001 From: Kenan Yao Date: Thu, 30 Aug 2018 19:17:47 +0800 Subject: [PATCH 02/15] fix filterConds bug --- util/ranger/detacher.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/util/ranger/detacher.go b/util/ranger/detacher.go index f4775761d67c2..29ad43fd95454 100644 --- a/util/ranger/detacher.go +++ b/util/ranger/detacher.go @@ -198,9 +198,9 @@ func detachCNFCondAndBuildRangeForIndex(sctx sessionctx.Context, conditions []ex func extractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Expression, cols []*expression.Column, lengths []int) ([]expression.Expression, []expression.Expression, []expression.Expression) { + var filters []expression.Expression rb := builder{sc: sctx.GetSessionVars().StmtCtx} accesses := make([]expression.Expression, len(cols)) - filters := make([]expression.Expression, len(cols)) points := make([][]point, len(cols)) mergedAccesses := make([]expression.Expression, len(cols)) condOffset := make([]int, len(cols)) @@ -228,6 +228,7 @@ func extractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex points[offset] = rb.build(accesses[offset]) } points[offset] = rb.intersection(points[offset], rb.build(conditions[i])) + //XXX quick termination if len(points[offset]) == 0 if inOffset == -1 { mergedAccesses[offset] = conditions[i] } From d70c96b0af10b63ec77bbf403a79860b9df6adce Mon Sep 17 00:00:00 2001 From: Kenan Yao Date: Fri, 31 Aug 2018 00:27:35 +0800 Subject: [PATCH 03/15] build empty range and force index scan if eq/in leads to false expression --- plan/find_best_task.go | 3 ++- util/ranger/detacher.go | 15 ++++++++++----- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/plan/find_best_task.go b/plan/find_best_task.go index 58c4e2d37a009..81113718e04b9 100644 --- a/plan/find_best_task.go +++ b/plan/find_best_task.go @@ -263,10 +263,11 @@ func (ds *DataSource) findBestTask(prop *requiredProp) (t task, err error) { continue } // We will use index to generate physical plan if: + // this path's range is nil or // this path's access cond is not nil or // we have prop to match or // this index is forced to choose. - if len(path.accessConds) > 0 || len(prop.cols) > 0 || path.forced { + if len(path.ranges) == 0 || len(path.accessConds) > 0 || len(prop.cols) > 0 || path.forced { idxTask, err := ds.convertToIndexScan(prop, path) if err != nil { return nil, errors.Trace(err) diff --git a/util/ranger/detacher.go b/util/ranger/detacher.go index 29ad43fd95454..52ed4419b8cbc 100644 --- a/util/ranger/detacher.go +++ b/util/ranger/detacher.go @@ -155,7 +155,10 @@ func detachCNFCondAndBuildRangeForIndex(sctx sessionctx.Context, conditions []ex err error ) - accessConds, filterConds, conditions := extractEqAndInCondition(sctx, conditions, cols, lengths) + accessConds, filterConds, conditions, emptyRange := extractEqAndInCondition(sctx, conditions, cols, lengths) + if emptyRange { + return ranges, nil, nil, 0, nil + } for ; eqCount < len(accessConds); eqCount++ { if accessConds[eqCount].(*expression.ScalarFunction).FuncName.L != ast.EQ { @@ -197,7 +200,7 @@ func detachCNFCondAndBuildRangeForIndex(sctx sessionctx.Context, conditions []ex } func extractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Expression, - cols []*expression.Column, lengths []int) ([]expression.Expression, []expression.Expression, []expression.Expression) { + cols []*expression.Column, lengths []int) ([]expression.Expression, []expression.Expression, []expression.Expression, bool) { var filters []expression.Expression rb := builder{sc: sctx.GetSessionVars().StmtCtx} accesses := make([]expression.Expression, len(cols)) @@ -228,7 +231,10 @@ func extractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex points[offset] = rb.build(accesses[offset]) } points[offset] = rb.intersection(points[offset], rb.build(conditions[i])) - //XXX quick termination if len(points[offset]) == 0 + //early termination if false expression found + if len(points[offset]) == 0 { + return nil, nil, nil, true + } if inOffset == -1 { mergedAccesses[offset] = conditions[i] } @@ -239,7 +245,6 @@ func extractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex continue } accesses[i] = points2EqOrInCond(sctx, points[i], mergedAccesses[i]) - //XXX check nil conditions = append(conditions, accesses[i]) } for i, cond := range accesses { @@ -251,7 +256,7 @@ func extractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex filters = append(filters, cond) } } - return accesses, filters, conditions + return accesses, filters, conditions, false } // detachDNFCondAndBuildRangeForIndex will detach the index filters from table filters when it's a DNF. From 9e2ac09bbc0087d8425c5b573b4664914558eeb8 Mon Sep 17 00:00:00 2001 From: Kenan Yao Date: Sat, 1 Sep 2018 14:46:59 +0800 Subject: [PATCH 04/15] revert split of function offset check the len(values) to decide whether EQ or In --- util/ranger/detacher.go | 29 ++++++----------------------- util/ranger/ranger.go | 6 +++++- 2 files changed, 11 insertions(+), 24 deletions(-) diff --git a/util/ranger/detacher.go b/util/ranger/detacher.go index 52ed4419b8cbc..83d6bed8bb3de 100644 --- a/util/ranger/detacher.go +++ b/util/ranger/detacher.go @@ -88,9 +88,10 @@ func detachColumnDNFConditions(sctx sessionctx.Context, conditions []expression. return accessConditions, hasResidualConditions } -// getEqColOffset checks if the expression is a eq function that one side is constant and another is column. +// getEqOrInColOffset checks if the expression is a eq function that one side is constant and another is column or an +// in function which is `column in (constant list)`. // If so, it will return the offset of this column in the slice, otherwise return -1 for not found. -func getEqColOffset(expr expression.Expression, cols []*expression.Column) int { +func getEqOrInColOffset(expr expression.Expression, cols []*expression.Column) int { f, ok := expr.(*expression.ScalarFunction) if !ok { return -1 @@ -115,16 +116,6 @@ func getEqColOffset(expr expression.Expression, cols []*expression.Column) int { } } } - return -1 -} - -// getInColOffset checks if the expression is an in function which is `column in (constant list)`. -// If so, it will return the offset of this column in the slice, otherwise return -1 for not found. -func getInColOffset(expr expression.Expression, cols []*expression.Column) int { - f, ok := expr.(*expression.ScalarFunction) - if !ok { - return -1 - } if f.FuncName.L == ast.In { c, ok := f.GetArgs()[0].(*expression.Column) if !ok { @@ -209,14 +200,9 @@ func extractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex condOffset := make([]int, len(cols)) //should not use range to iterate conditions, because we would delete items while iterating for i := len(conditions) - 1; i >= 0; i-- { - eqOffset := getEqColOffset(conditions[i], cols) - inOffset := getInColOffset(conditions[i], cols) - offset := eqOffset - if eqOffset == -1 { - if inOffset == -1 { - continue - } - offset = inOffset + offset := getEqOrInColOffset(conditions[i], cols) + if offset == -1 { + continue } if accesses[offset] == nil { accesses[offset] = conditions[i] @@ -235,9 +221,6 @@ func extractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex if len(points[offset]) == 0 { return nil, nil, nil, true } - if inOffset == -1 { - mergedAccesses[offset] = conditions[i] - } conditions = append(conditions[:i], conditions[i+1:]...) } for i, ma := range mergedAccesses { diff --git a/util/ranger/ranger.go b/util/ranger/ranger.go index 5cef2e26e1aaf..7fb4a17ba92a2 100644 --- a/util/ranger/ranger.go +++ b/util/ranger/ranger.go @@ -492,6 +492,10 @@ func points2EqOrInCond(ctx sessionctx.Context, points []point, expr expression.E } values = append(values, value) } - f := expression.NewFunctionInternal(ctx, sf.FuncName.L, sf.GetType(), values...) + funcName := ast.EQ + if len(values) > 2 { + funcName = ast.In + } + f := expression.NewFunctionInternal(ctx, funcName, sf.GetType(), values...) return f } From 1efbd860b40e0d965541bf1da8d872a4ff5f9485 Mon Sep 17 00:00:00 2001 From: Kenan Yao Date: Sat, 1 Sep 2018 16:24:25 +0800 Subject: [PATCH 05/15] fix memory bug, interfere with table path tableFilters --- util/ranger/detacher.go | 31 ++++++++++++++++--------------- util/ranger/ranger.go | 4 +--- 2 files changed, 17 insertions(+), 18 deletions(-) diff --git a/util/ranger/detacher.go b/util/ranger/detacher.go index 83d6bed8bb3de..398c531a4aeef 100644 --- a/util/ranger/detacher.go +++ b/util/ranger/detacher.go @@ -146,7 +146,7 @@ func detachCNFCondAndBuildRangeForIndex(sctx sessionctx.Context, conditions []ex err error ) - accessConds, filterConds, conditions, emptyRange := extractEqAndInCondition(sctx, conditions, cols, lengths) + accessConds, filterConds, newConditions, emptyRange := extractEqAndInCondition(sctx, conditions, cols, lengths) if emptyRange { return ranges, nil, nil, 0, nil } @@ -157,11 +157,11 @@ func detachCNFCondAndBuildRangeForIndex(sctx sessionctx.Context, conditions []ex } } // We should remove all accessConds, so that they will not be added to filter conditions. - conditions = removeAccessConditions(conditions, accessConds) + newConditions = removeAccessConditions(newConditions, accessConds) eqOrInCount := len(accessConds) if eqOrInCount == len(cols) { // If curIndex equals to len of index columns, it means the rest conditions haven't been appended to filter conditions. - filterConds = append(filterConds, conditions...) + filterConds = append(filterConds, newConditions...) ranges, err = buildCNFIndexRange(sctx.GetSessionVars().StmtCtx, cols, tpSlice, lengths, eqOrInCount, accessConds) if err != nil { return nil, nil, nil, 0, errors.Trace(err) @@ -174,11 +174,11 @@ func detachCNFCondAndBuildRangeForIndex(sctx sessionctx.Context, conditions []ex shouldReserve: lengths[eqOrInCount] != types.UnspecifiedLength, } if considerDNF { - accesses, filters := detachColumnCNFConditions(sctx, conditions, checker) + accesses, filters := detachColumnCNFConditions(sctx, newConditions, checker) accessConds = append(accessConds, accesses...) filterConds = append(filterConds, filters...) } else { - for _, cond := range conditions { + for _, cond := range newConditions { if !checker.check(cond) { filterConds = append(filterConds, cond) continue @@ -197,38 +197,39 @@ func extractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex accesses := make([]expression.Expression, len(cols)) points := make([][]point, len(cols)) mergedAccesses := make([]expression.Expression, len(cols)) - condOffset := make([]int, len(cols)) + newConditions := make([]expression.Expression, 0, len(conditions)) //should not use range to iterate conditions, because we would delete items while iterating - for i := len(conditions) - 1; i >= 0; i-- { - offset := getEqOrInColOffset(conditions[i], cols) + for _, cond := range conditions { + offset := getEqOrInColOffset(cond, cols) if offset == -1 { + newConditions = append(newConditions, cond) continue } if accesses[offset] == nil { - accesses[offset] = conditions[i] - condOffset[offset] = i + accesses[offset] = cond continue } //multiple Eq/In conditions for one column in CNF, apply intersection on them //lazily compute the points for the first visited Eq/In if mergedAccesses[offset] == nil { mergedAccesses[offset] = accesses[offset] - conditions = append(conditions[:condOffset[offset]], conditions[condOffset[offset]+1:]...) points[offset] = rb.build(accesses[offset]) } - points[offset] = rb.intersection(points[offset], rb.build(conditions[i])) + points[offset] = rb.intersection(points[offset], rb.build(cond)) //early termination if false expression found if len(points[offset]) == 0 { return nil, nil, nil, true } - conditions = append(conditions[:i], conditions[i+1:]...) } for i, ma := range mergedAccesses { if ma == nil { + if accesses[i] != nil { + newConditions = append(newConditions, accesses[i]) + } continue } accesses[i] = points2EqOrInCond(sctx, points[i], mergedAccesses[i]) - conditions = append(conditions, accesses[i]) + newConditions = append(newConditions, accesses[i]) } for i, cond := range accesses { if cond == nil { @@ -239,7 +240,7 @@ func extractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex filters = append(filters, cond) } } - return accesses, filters, conditions, false + return accesses, filters, newConditions, false } // detachDNFCondAndBuildRangeForIndex will detach the index filters from table filters when it's a DNF. diff --git a/util/ranger/ranger.go b/util/ranger/ranger.go index 7fb4a17ba92a2..84cfdbd9c94ec 100644 --- a/util/ranger/ranger.go +++ b/util/ranger/ranger.go @@ -469,9 +469,7 @@ func newFieldType(tp *types.FieldType) *types.FieldType { } func points2EqOrInCond(ctx sessionctx.Context, points []point, expr expression.Expression) expression.Expression { - if len(points) == 0 { - return nil - } + //len(points) cannot be 0 now, we have early termination in extractEqAndInCondition sf, _ := expr.(*expression.ScalarFunction) //Constant and Column args should have same RetType, simply get from first arg retType := sf.GetArgs()[0].GetType() From 0c3460a791966b76cd011521ed6581bd150df800 Mon Sep 17 00:00:00 2001 From: Kenan Yao Date: Sat, 1 Sep 2018 16:39:07 +0800 Subject: [PATCH 06/15] add explain test --- cmd/explaintest/r/explain_easy.result | 23 +++++++++++++++++++++++ cmd/explaintest/t/explain_easy.test | 8 ++++++++ 2 files changed, 31 insertions(+) diff --git a/cmd/explaintest/r/explain_easy.result b/cmd/explaintest/r/explain_easy.result index ff0e9159abeaa..a92af461367ca 100644 --- a/cmd/explaintest/r/explain_easy.result +++ b/cmd/explaintest/r/explain_easy.result @@ -327,3 +327,26 @@ id count task operator info Projection_3 10000.00 root 0 └─TableReader_5 10000.00 root data:TableScan_4 └─TableScan_4 10000.00 cop table:t, range:[-inf,+inf], keep order:false, stats:pseudo +drop table if exists t; +create table t(a bigint, b bigint, index idx(a, b)); +explain select * from t where a in (1, 2) and a in (1, 3); +id count task operator info +IndexReader_9 10.00 root index:IndexScan_8 +└─IndexScan_8 10.00 cop table:t, index:a, b, range:[1,1], keep order:false, stats:pseudo +explain select * from t where b in (1, 2) and b in (1, 3); +id count task operator info +TableReader_7 10.00 root data:Selection_6 +└─Selection_6 10.00 cop in(test.t.b, 1, 2), in(test.t.b, 1, 3) + └─TableScan_5 10000.00 cop table:t, range:[-inf,+inf], keep order:false, stats:pseudo +explain select * from t where a = 1 and a = 1; +id count task operator info +IndexReader_9 10.00 root index:IndexScan_8 +└─IndexScan_8 10.00 cop table:t, index:a, b, range:[1,1], keep order:false, stats:pseudo +explain select * from t where a = 1 and a = 2; +id count task operator info +IndexReader_9 0.00 root index:IndexScan_8 +└─IndexScan_8 0.00 cop table:t, index:a, b, keep order:false, stats:pseudo +explain select * from t where b = 1 and b = 2; +id count task operator info +IndexReader_9 0.00 root index:IndexScan_8 +└─IndexScan_8 0.00 cop table:t, index:a, b, keep order:false, stats:pseudo diff --git a/cmd/explaintest/t/explain_easy.test b/cmd/explaintest/t/explain_easy.test index 0254c755322de..b6e67ed2c245c 100644 --- a/cmd/explaintest/t/explain_easy.test +++ b/cmd/explaintest/t/explain_easy.test @@ -58,3 +58,11 @@ explain select t.a = '123455' from t; explain select t.a > '123455' from t; explain select t.a != '123455' from t; explain select t.a = 12345678912345678998789678687678.111 from t; + +drop table if exists t; +create table t(a bigint, b bigint, index idx(a, b)); +explain select * from t where a in (1, 2) and a in (1, 3); +explain select * from t where b in (1, 2) and b in (1, 3); +explain select * from t where a = 1 and a = 1; +explain select * from t where a = 1 and a = 2; +explain select * from t where b = 1 and b = 2; From c027370e5b5c870afabb78f2567b0404e423a953 Mon Sep 17 00:00:00 2001 From: Kenan Yao Date: Sat, 1 Sep 2018 16:52:16 +0800 Subject: [PATCH 07/15] adjust comments --- util/ranger/detacher.go | 3 +-- util/ranger/ranger.go | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/util/ranger/detacher.go b/util/ranger/detacher.go index 398c531a4aeef..7659308b3d644 100644 --- a/util/ranger/detacher.go +++ b/util/ranger/detacher.go @@ -198,7 +198,6 @@ func extractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex points := make([][]point, len(cols)) mergedAccesses := make([]expression.Expression, len(cols)) newConditions := make([]expression.Expression, 0, len(conditions)) - //should not use range to iterate conditions, because we would delete items while iterating for _, cond := range conditions { offset := getEqOrInColOffset(cond, cols) if offset == -1 { @@ -210,7 +209,7 @@ func extractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex continue } //multiple Eq/In conditions for one column in CNF, apply intersection on them - //lazily compute the points for the first visited Eq/In + //lazily compute the points for the previously visited Eq/In if mergedAccesses[offset] == nil { mergedAccesses[offset] = accesses[offset] points[offset] = rb.build(accesses[offset]) diff --git a/util/ranger/ranger.go b/util/ranger/ranger.go index 84cfdbd9c94ec..59f958595ce6d 100644 --- a/util/ranger/ranger.go +++ b/util/ranger/ranger.go @@ -469,7 +469,7 @@ func newFieldType(tp *types.FieldType) *types.FieldType { } func points2EqOrInCond(ctx sessionctx.Context, points []point, expr expression.Expression) expression.Expression { - //len(points) cannot be 0 now, we have early termination in extractEqAndInCondition + //len(points) cannot be 0 here, since we impose early termination in extractEqAndInCondition sf, _ := expr.(*expression.ScalarFunction) //Constant and Column args should have same RetType, simply get from first arg retType := sf.GetArgs()[0].GetType() From 7b2cf0f619770935e6e59a049f33d6b6e530aa6c Mon Sep 17 00:00:00 2001 From: Kenan Yao Date: Sat, 1 Sep 2018 21:35:11 +0800 Subject: [PATCH 08/15] convert empty range scan to TableDual --- cmd/explaintest/r/explain_easy.result | 11 +++++++---- cmd/explaintest/t/explain_easy.test | 4 ++++ plan/find_best_task.go | 11 +++++++++-- 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/cmd/explaintest/r/explain_easy.result b/cmd/explaintest/r/explain_easy.result index a92af461367ca..24ea69168bd67 100644 --- a/cmd/explaintest/r/explain_easy.result +++ b/cmd/explaintest/r/explain_easy.result @@ -344,9 +344,12 @@ IndexReader_9 10.00 root index:IndexScan_8 └─IndexScan_8 10.00 cop table:t, index:a, b, range:[1,1], keep order:false, stats:pseudo explain select * from t where a = 1 and a = 2; id count task operator info -IndexReader_9 0.00 root index:IndexScan_8 -└─IndexScan_8 0.00 cop table:t, index:a, b, keep order:false, stats:pseudo +TableDual_8 0.00 root rows:0 explain select * from t where b = 1 and b = 2; id count task operator info -IndexReader_9 0.00 root index:IndexScan_8 -└─IndexScan_8 0.00 cop table:t, index:a, b, keep order:false, stats:pseudo +TableDual_8 0.00 root rows:0 +drop table if exists t; +create table t(a bigint primary key); +explain select * from t where a = 1 and a = 2; +id count task operator info +TableDual_5 10000.00 root rows:0 diff --git a/cmd/explaintest/t/explain_easy.test b/cmd/explaintest/t/explain_easy.test index b6e67ed2c245c..54ddf73009f36 100644 --- a/cmd/explaintest/t/explain_easy.test +++ b/cmd/explaintest/t/explain_easy.test @@ -66,3 +66,7 @@ explain select * from t where b in (1, 2) and b in (1, 3); explain select * from t where a = 1 and a = 1; explain select * from t where a = 1 and a = 2; explain select * from t where b = 1 and b = 2; + +drop table if exists t; +create table t(a bigint primary key); +explain select * from t where a = 1 and a = 2; diff --git a/plan/find_best_task.go b/plan/find_best_task.go index 81113718e04b9..4b7796cabb071 100644 --- a/plan/find_best_task.go +++ b/plan/find_best_task.go @@ -252,6 +252,14 @@ func (ds *DataSource) findBestTask(prop *requiredProp) (t task, err error) { t = invalidTask for _, path := range ds.possibleAccessPaths { + // if we already know the range of the scan is empty, just return a TableDual + if len(path.ranges) == 0 { + dual := PhysicalTableDual{}.init(ds.ctx, ds.stats) + dual.SetSchema(ds.schema) + return &rootTask{ + p: dual, + }, nil + } if path.isTablePath { tblTask, err := ds.convertToTableScan(prop, path) if err != nil { @@ -263,11 +271,10 @@ func (ds *DataSource) findBestTask(prop *requiredProp) (t task, err error) { continue } // We will use index to generate physical plan if: - // this path's range is nil or // this path's access cond is not nil or // we have prop to match or // this index is forced to choose. - if len(path.ranges) == 0 || len(path.accessConds) > 0 || len(prop.cols) > 0 || path.forced { + if len(path.accessConds) > 0 || len(prop.cols) > 0 || path.forced { idxTask, err := ds.convertToIndexScan(prop, path) if err != nil { return nil, errors.Trace(err) From 0a603d17e102f21bf8bf3a9f67be899ab533f191 Mon Sep 17 00:00:00 2001 From: Kenan Yao Date: Sun, 2 Sep 2018 01:24:16 +0800 Subject: [PATCH 09/15] TableDual is reused in plan cache --- plan/find_best_task.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plan/find_best_task.go b/plan/find_best_task.go index 4b7796cabb071..30f5721a17e8b 100644 --- a/plan/find_best_task.go +++ b/plan/find_best_task.go @@ -253,7 +253,7 @@ func (ds *DataSource) findBestTask(prop *requiredProp) (t task, err error) { for _, path := range ds.possibleAccessPaths { // if we already know the range of the scan is empty, just return a TableDual - if len(path.ranges) == 0 { + if len(path.ranges) == 0 && !ds.ctx.GetSessionVars().StmtCtx.UseCache { dual := PhysicalTableDual{}.init(ds.ctx, ds.stats) dual.SetSchema(ds.schema) return &rootTask{ From b1c1b24c5d4a75abeb9fd44a476042a6cfd29c4b Mon Sep 17 00:00:00 2001 From: Kenan Yao Date: Sun, 2 Sep 2018 11:39:57 +0800 Subject: [PATCH 10/15] modify unit test expected best plan, because empty range would be converted to TableDual now --- plan/physical_plan_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/plan/physical_plan_test.go b/plan/physical_plan_test.go index 189ccc23fab72..8b76826f020e7 100644 --- a/plan/physical_plan_test.go +++ b/plan/physical_plan_test.go @@ -72,7 +72,7 @@ func (s *testPlanSuite) TestDAGPlanBuilderSimpleCase(c *C) { }, { sql: "select * from t where (t.c > 0 and t.c < 1) or (t.c > 2 and t.c < 3) or (t.c > 4 and t.c < 5) or (t.c > 6 and t.c < 7) or (t.c > 9 and t.c < 10)", - best: "IndexLookUp(Index(t.c_d_e)[], Table(t))", + best: "Dual", }, // Test TopN to table branch in double read. { @@ -87,7 +87,7 @@ func (s *testPlanSuite) TestDAGPlanBuilderSimpleCase(c *C) { // Test Null Range but the column has not null flag. { sql: "select * from t where t.c is null", - best: "IndexLookUp(Index(t.c_d_e)[], Table(t))", + best: "Dual", }, // Test TopN to index branch in double read. { @@ -1025,7 +1025,7 @@ func (s *testPlanSuite) TestRefine(c *C) { }, { sql: "select a from t where c in (1, 2, 3) and (d > 3 and d < 4 or d > 5 and d < 6)", - best: "IndexReader(Index(t.c_d_e)[])->Projection", + best: "Dual->Projection", }, { sql: "select a from t where c in (1, 2, 3) and (d > 2 and d < 4 or d > 5 and d < 7)", From 2aa9a99793deb51a3c3f22e8e259e666cb472a76 Mon Sep 17 00:00:00 2001 From: Kenan Yao Date: Sun, 2 Sep 2018 12:58:35 +0800 Subject: [PATCH 11/15] adjust expected best plan of unit test, because emptry range scan is converted to TableDual --- plan/cbo_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plan/cbo_test.go b/plan/cbo_test.go index 5f90f40e36576..10aa64df1c826 100644 --- a/plan/cbo_test.go +++ b/plan/cbo_test.go @@ -554,7 +554,7 @@ func (s *testAnalyzeSuite) TestPreparedNullParam(c *C) { testKit.MustExec("insert into t values (1), (2), (3)") sql := "select * from t where id = ?" - best := "IndexReader(Index(t.id)[])" + best := "Dual" ctx := testKit.Se.(sessionctx.Context) stmts, err := session.Parse(ctx, sql) From 4e8f961209476d269f3330b4b525ed8131d9a212 Mon Sep 17 00:00:00 2001 From: Kenan Yao Date: Mon, 3 Sep 2018 14:25:36 +0800 Subject: [PATCH 12/15] update explain count from 10000 to 0 --- cmd/explaintest/r/explain_easy.result | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/explaintest/r/explain_easy.result b/cmd/explaintest/r/explain_easy.result index 24ea69168bd67..5f9bcc55be58c 100644 --- a/cmd/explaintest/r/explain_easy.result +++ b/cmd/explaintest/r/explain_easy.result @@ -352,4 +352,4 @@ drop table if exists t; create table t(a bigint primary key); explain select * from t where a = 1 and a = 2; id count task operator info -TableDual_5 10000.00 root rows:0 +TableDual_5 0.00 root rows:0 From 79501564b2b3b933c9120237e59997f8a15d54c7 Mon Sep 17 00:00:00 2001 From: Kenan Yao Date: Wed, 5 Sep 2018 20:40:30 +0800 Subject: [PATCH 13/15] address comments: add comments, change variable name, remove other paths --- plan/stats.go | 8 ++++---- util/ranger/ranger.go | 19 ++++++++++++------- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/plan/stats.go b/plan/stats.go index 32a19a084acc1..1a3ced2540840 100644 --- a/plan/stats.go +++ b/plan/stats.go @@ -147,8 +147,8 @@ func (ds *DataSource) deriveStats() (*statsInfo, error) { if err != nil { return nil, errors.Trace(err) } - // If there's only point range. Just remove other possible paths. - if noIntervalRanges { + // If we have point or empty range, just remove other possible paths. + if noIntervalRanges || len(path.ranges) == 0 { ds.possibleAccessPaths[0] = path ds.possibleAccessPaths = ds.possibleAccessPaths[:1] break @@ -159,8 +159,8 @@ func (ds *DataSource) deriveStats() (*statsInfo, error) { if err != nil { return nil, errors.Trace(err) } - // If there's only point range and this index is unique key. Just remove other possible paths. - if noIntervalRanges && path.index.Unique { + // If we have empty range, or point range on unique index, just remove other possible paths. + if (noIntervalRanges && path.index.Unique) || len(path.ranges) == 0 { ds.possibleAccessPaths[0] = path ds.possibleAccessPaths = ds.possibleAccessPaths[:1] break diff --git a/util/ranger/ranger.go b/util/ranger/ranger.go index 59f958595ce6d..99c5403e54268 100644 --- a/util/ranger/ranger.go +++ b/util/ranger/ranger.go @@ -468,32 +468,37 @@ func newFieldType(tp *types.FieldType) *types.FieldType { } } +// points2EqOrInCond constructs a 'EQUAL' or 'IN' scalar function based on the +// 'points'. The target column is extracted from the 'expr'. +// NOTE: +// 1. 'expr' must be either 'EQUAL' or 'IN' function. +// 2. 'points' should not be empty. func points2EqOrInCond(ctx sessionctx.Context, points []point, expr expression.Expression) expression.Expression { //len(points) cannot be 0 here, since we impose early termination in extractEqAndInCondition sf, _ := expr.(*expression.ScalarFunction) //Constant and Column args should have same RetType, simply get from first arg retType := sf.GetArgs()[0].GetType() - values := make([]expression.Expression, 0, len(points)/2) + args := make([]expression.Expression, 0, len(points)/2) if sf.FuncName.L == ast.EQ { if c, ok := sf.GetArgs()[0].(*expression.Column); ok { - values = append(values, c) + args = append(args, c) } else if c, ok := sf.GetArgs()[1].(*expression.Column); ok { - values = append(values, c) + args = append(args, c) } } else { - values = append(values, sf.GetArgs()[0]) + args = append(args, sf.GetArgs()[0]) } for i := 0; i < len(points); i = i + 2 { value := &expression.Constant{ Value: points[i].value, RetType: retType, } - values = append(values, value) + args = append(args, value) } funcName := ast.EQ - if len(values) > 2 { + if len(args) > 2 { funcName = ast.In } - f := expression.NewFunctionInternal(ctx, funcName, sf.GetType(), values...) + f := expression.NewFunctionInternal(ctx, funcName, sf.GetType(), args...) return f } From 615517f84e8c7c0961fb142c5cbd08c38be1f7f2 Mon Sep 17 00:00:00 2001 From: Kenan Yao Date: Wed, 5 Sep 2018 20:48:31 +0800 Subject: [PATCH 14/15] modify explain test, because we remove other paths --- cmd/explaintest/r/explain_easy.result | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/explaintest/r/explain_easy.result b/cmd/explaintest/r/explain_easy.result index 5141f4db853da..ce2bb3da47890 100644 --- a/cmd/explaintest/r/explain_easy.result +++ b/cmd/explaintest/r/explain_easy.result @@ -343,10 +343,10 @@ IndexReader_9 10.00 root index:IndexScan_8 └─IndexScan_8 10.00 cop table:t, index:a, b, range:[1,1], keep order:false, stats:pseudo explain select * from t where a = 1 and a = 2; id count task operator info -TableDual_8 0.00 root rows:0 +TableDual_5 0.00 root rows:0 explain select * from t where b = 1 and b = 2; id count task operator info -TableDual_8 0.00 root rows:0 +TableDual_5 0.00 root rows:0 drop table if exists t; create table t(a bigint primary key); explain select * from t where a = 1 and a = 2; From 72878272a008a547ab6dd660b52e3ac63b60007b Mon Sep 17 00:00:00 2001 From: Kenan Yao Date: Thu, 6 Sep 2018 19:57:01 +0800 Subject: [PATCH 15/15] adjust comments and add unit test --- util/ranger/detacher.go | 6 +++--- util/ranger/ranger.go | 4 ++-- util/ranger/ranger_test.go | 14 ++++++++++++++ 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/util/ranger/detacher.go b/util/ranger/detacher.go index 7659308b3d644..5a31ff8328e95 100644 --- a/util/ranger/detacher.go +++ b/util/ranger/detacher.go @@ -208,14 +208,14 @@ func extractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex accesses[offset] = cond continue } - //multiple Eq/In conditions for one column in CNF, apply intersection on them - //lazily compute the points for the previously visited Eq/In + // Multiple Eq/In conditions for one column in CNF, apply intersection on them + // Lazily compute the points for the previously visited Eq/In if mergedAccesses[offset] == nil { mergedAccesses[offset] = accesses[offset] points[offset] = rb.build(accesses[offset]) } points[offset] = rb.intersection(points[offset], rb.build(cond)) - //early termination if false expression found + // Early termination if false expression found if len(points[offset]) == 0 { return nil, nil, nil, true } diff --git a/util/ranger/ranger.go b/util/ranger/ranger.go index 99c5403e54268..cc9721a100046 100644 --- a/util/ranger/ranger.go +++ b/util/ranger/ranger.go @@ -474,9 +474,9 @@ func newFieldType(tp *types.FieldType) *types.FieldType { // 1. 'expr' must be either 'EQUAL' or 'IN' function. // 2. 'points' should not be empty. func points2EqOrInCond(ctx sessionctx.Context, points []point, expr expression.Expression) expression.Expression { - //len(points) cannot be 0 here, since we impose early termination in extractEqAndInCondition + // len(points) cannot be 0 here, since we impose early termination in extractEqAndInCondition sf, _ := expr.(*expression.ScalarFunction) - //Constant and Column args should have same RetType, simply get from first arg + // Constant and Column args should have same RetType, simply get from first arg retType := sf.GetArgs()[0].GetType() args := make([]expression.Expression, 0, len(points)/2) if sf.FuncName.L == ast.EQ { diff --git a/util/ranger/ranger_test.go b/util/ranger/ranger_test.go index 7618e866a1b4a..ff91a2666e661 100644 --- a/util/ranger/ranger_test.go +++ b/util/ranger/ranger_test.go @@ -446,6 +446,20 @@ func (s *testRangerSuite) TestIndexRange(c *C) { filterConds: "[]", resultStr: "[(NULL +inf,1 NULL) (1 +inf,2 NULL) (2 +inf,3 NULL) (3 +inf,+inf +inf]]", }, + { + indexPos: 1, + exprStr: "c in (1, 2) and c in (1, 3)", + accessConds: "[eq(test.t.c, 1)]", + filterConds: "[]", + resultStr: "[[1,1]]", + }, + { + indexPos: 1, + exprStr: "c = 1 and c = 2", + accessConds: "[]", + filterConds: "[]", + resultStr: "[]", + }, { indexPos: 0, exprStr: "a in (NULL)",