Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

plan: calc access path when doing deriveStats. #6346

Merged
merged 11 commits into from
Apr 28, 2018
7 changes: 5 additions & 2 deletions plan/build_key_info.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,8 +174,11 @@ func (p *LogicalJoin) buildKeyInfo() {
func (ds *DataSource) buildKeyInfo() {
ds.schema.Keys = nil
ds.baseLogicalPlan.buildKeyInfo()
indices := ds.availableIndices.indices
for _, idx := range indices {
for _, path := range ds.possibleAccessPaths {
if path.isRowID {
continue
}
idx := path.index
if !idx.Unique {
continue
}
Expand Down
9 changes: 5 additions & 4 deletions plan/gen_physical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -256,9 +256,9 @@ func (p *LogicalJoin) getIndexJoinByOuterIdx(prop *requiredProp, outerIdx int) [
if !ok {
return nil
}
indices := x.availableIndices.indices
includeTableScan := x.availableIndices.includeTableScan
if includeTableScan && len(innerJoinKeys) == 1 {
indexPaths := x.possibleAccessPaths
if len(x.possibleAccessPaths) > 0 && x.possibleAccessPaths[0].isRowID {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why not use len(indexPaths) > 0 ...

indexPaths = indexPaths[1:]
pkCol := x.getPKIsHandleCol()
if pkCol != nil && innerJoinKeys[0].Equal(nil, pkCol) {
innerPlan := x.forceToTableScan(pkCol)
Expand All @@ -272,7 +272,8 @@ func (p *LogicalJoin) getIndexJoinByOuterIdx(prop *requiredProp, outerIdx int) [
remainedOfBest []expression.Expression
keyOff2IdxOff []int
)
for _, indexInfo := range indices {
for _, path := range indexPaths {
indexInfo := path.index
ranges, remained, tmpKeyOff2IdxOff := p.buildRangeForIndexJoin(indexInfo, x, innerJoinKeys)
// We choose the index by the number of used columns of the range, the much the better.
// Notice that there may be the cases like `t1.a=t2.a and b > 2 and b < 1`. So ranges can be nil though the conditions are valid.
Expand Down
14 changes: 7 additions & 7 deletions plan/logical_plan_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -1671,7 +1671,7 @@ func (b *planBuilder) buildDataSource(tn *ast.TableName) LogicalPlan {
tableInfo := tbl.Meta()
b.visitInfo = appendVisitInfo(b.visitInfo, mysql.SelectPriv, dbName.L, tableInfo.Name.L, "")

availableIdxes, err := getAvailableIndices(tn.IndexHints, tableInfo)
possiblePaths, err := getPossibleAccessPaths(tn.IndexHints, tableInfo)
if err != nil {
b.err = errors.Trace(err)
return nil
Expand All @@ -1685,12 +1685,12 @@ func (b *planBuilder) buildDataSource(tn *ast.TableName) LogicalPlan {
}

ds := DataSource{
DBName: dbName,
tableInfo: tableInfo,
statisticTable: b.getStatsTable(tableInfo),
indexHints: tn.IndexHints,
availableIndices: availableIdxes,
Columns: make([]*model.ColumnInfo, 0, len(columns)),
DBName: dbName,
tableInfo: tableInfo,
statisticTable: b.getStatsTable(tableInfo),
indexHints: tn.IndexHints,
possibleAccessPaths: possiblePaths,
Columns: make([]*model.ColumnInfo, 0, len(columns)),
}.init(b.ctx)

var handleCol *expression.Column
Expand Down
90 changes: 85 additions & 5 deletions plan/logical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,18 @@
package plan

import (
"math"

"github.com/juju/errors"
"github.com/pingcap/tidb/ast"
"github.com/pingcap/tidb/expression"
"github.com/pingcap/tidb/expression/aggregation"
"github.com/pingcap/tidb/model"
"github.com/pingcap/tidb/mysql"
"github.com/pingcap/tidb/statistics"
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/ranger"
log "github.com/sirupsen/logrus"
)

var (
Expand Down Expand Up @@ -299,13 +304,88 @@ type DataSource struct {

statisticTable *statistics.Table

// availableIndices is used for storing result of availableIndices function.
availableIndices *availableIndices
// possibleAccessPaths stores all the possible index path for physical plan, including table scan.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

all the possible access paths

// Please make sure table scan is always the first element.
possibleAccessPaths []*accessPath
}

// accessPath tells how we access one index or just access table.
type accessPath struct {
index *model.IndexInfo
ranges []*ranger.NewRange
// countAfterAccess is the row count after we apply range seek and before we use other filter to filter data.
countAfterAccess float64
// countAfterIndex is the row count after we apply filters on index and before we apply the table filters.
countAfterIndex float64
accessConds []expression.Expression
eqCondCount int
indexFilters []expression.Expression
tableFilters []expression.Expression
// isRowID indicates this path stores the information for table scan.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

s/ this path stores the information for table scan/ whether this path is a table path

isRowID bool
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why not just name it as isTablePath?

// forced means this index is generated by `use/force index()`.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

s/ this index/ this path

forced bool
}

type availableIndices struct {
indices []*model.IndexInfo
includeTableScan bool
func (ds *DataSource) prepareTablePath(path *accessPath) error {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since it's called by deriveStats, How about renaming it to deriveTablePathStats?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will this be more concise?

func (ds *DataSource) prepareTablePath(path *accessPath) error {
	var err error
	var pkCol *expression.Column
	sc := ds.ctx.GetSessionVars().StmtCtx
	path.countAfterAccess = float64(ds.statisticTable.Count)
	path.ranges = ranger.FullIntNewRange(false)
	path.tableFilters = ds.pushedDownConds
	if pkCol == nil {
		return nil
	}
	path.ranges = ranger.FullIntNewRange(mysql.HasUnsignedFlag(pkCol.RetType.Flag))
	if len(ds.pushedDownConds) == 0 {
		return nil
	}
	path.accessConds, path.tableFilters = ranger.DetachCondsForTableRange(ds.ctx, ds.pushedDownConds, pkCol)
	path.ranges, err = ranger.BuildTableRange(path.accessConds, sc, pkCol.RetType)
	if err != nil {
		return errors.Trace(err)
	}
	path.countAfterAccess, err = ds.statisticTable.GetRowCountByIntColumnRanges(sc, pkCol.ID, path.ranges)
	return errors.Trace(err)
}

var err error
sc := ds.ctx.GetSessionVars().StmtCtx
path.countAfterAccess = float64(ds.statisticTable.Count)
var pkCol *expression.Column
if pkCol != nil {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

when will pkCol != nil?

path.ranges = ranger.FullIntNewRange(mysql.HasUnsignedFlag(pkCol.RetType.Flag))
} else {
path.ranges = ranger.FullIntNewRange(false)
}
path.countAfterAccess = float64(ds.statisticTable.Count)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Duplicated with line 333?

if len(ds.pushedDownConds) > 0 {
if pkCol != nil {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto

path.accessConds, path.tableFilters = ranger.DetachCondsForTableRange(ds.ctx, ds.pushedDownConds, pkCol)
path.ranges, err = ranger.BuildTableRange(path.accessConds, sc, pkCol.RetType)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We may put sc as the first parameter.

if err != nil {
return errors.Trace(err)
}
path.countAfterAccess, err = ds.statisticTable.GetRowCountByIntColumnRanges(sc, pkCol.ID, path.ranges)
if err != nil {
return errors.Trace(err)
}
} else {
path.tableFilters = ds.pushedDownConds
}
}
return nil
}

func (ds *DataSource) prepareIndexPath(path *accessPath) error {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto

var err error
sc := ds.ctx.GetSessionVars().StmtCtx
path.ranges = ranger.FullNewRange()
path.countAfterAccess = float64(ds.statisticTable.Count)
path.countAfterIndex = float64(ds.statisticTable.Count)
idxCols, lengths := expression.IndexInfo2Cols(ds.schema.Columns, path.index)
if len(idxCols) != 0 {
path.ranges, path.accessConds, path.indexFilters, path.eqCondCount, err = ranger.DetachCondAndBuildRangeForIndex(ds.ctx, ds.pushedDownConds, idxCols, lengths)
if err != nil {
return errors.Trace(err)
}
path.countAfterAccess, err = ds.statisticTable.GetRowCountByIndexRanges(sc, path.index.ID, path.ranges)
if err != nil {
return errors.Trace(err)
}
path.indexFilters, path.tableFilters = splitIndexFilterConditions(path.indexFilters, path.index.Columns, ds.tableInfo)
} else {
path.indexFilters, path.tableFilters = splitIndexFilterConditions(ds.pushedDownConds, path.index.Columns, ds.tableInfo)
}
path.countAfterIndex = path.countAfterAccess
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Duplicated with line 364?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

removed one.

if path.indexFilters != nil {
selectivity, err := ds.statisticTable.Selectivity(ds.ctx, path.indexFilters)
if err != nil {
log.Warnf("An error happened: %v, we have to use the default selectivity", err.Error())
selectivity = selectionFactor
}
path.countAfterIndex = math.Max(path.countAfterAccess*selectivity, ds.statsAfterSelect.count)
}
return nil
}

func (ds *DataSource) getPKIsHandleCol() *expression.Column {
Expand Down
5 changes: 4 additions & 1 deletion plan/optimizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,10 @@ func logicalOptimize(flag uint64, logic LogicalPlan) (LogicalPlan, error) {

func physicalOptimize(logic LogicalPlan) (PhysicalPlan, error) {
logic.preparePossibleProperties()
logic.deriveStats()
_, err := logic.deriveStats()
if err != nil {
return nil, errors.Trace(err)
}
t, err := logic.findBestTask(&requiredProp{taskTp: rootTaskType, expectedCnt: math.MaxFloat64})
if err != nil {
return nil, errors.Trace(err)
Expand Down
Loading