pingcap · zz-jason · Apr 28, 2018 · Apr 23, 2018 · Apr 23, 2018 · Apr 24, 2018
diff --git a/plan/build_key_info.go b/plan/build_key_info.go
@@ -174,8 +174,11 @@ func (p *LogicalJoin) buildKeyInfo() {
 func (ds *DataSource) buildKeyInfo() {
 	ds.schema.Keys = nil
 	ds.baseLogicalPlan.buildKeyInfo()
-	indices := ds.availableIndices.indices
-	for _, idx := range indices {
+	for _, path := range ds.possibleAccessPaths {
+		if path.isRowID {
+			continue
+		}
+		idx := path.index
 		if !idx.Unique {
 			continue
 		}

diff --git a/plan/gen_physical_plans.go b/plan/gen_physical_plans.go
@@ -256,9 +256,9 @@ func (p *LogicalJoin) getIndexJoinByOuterIdx(prop *requiredProp, outerIdx int) [
 	if !ok {
 		return nil
 	}
-	indices := x.availableIndices.indices
-	includeTableScan := x.availableIndices.includeTableScan
-	if includeTableScan && len(innerJoinKeys) == 1 {
+	indexPaths := x.possibleAccessPaths
+	if len(x.possibleAccessPaths) > 0 && x.possibleAccessPaths[0].isRowID {
+		indexPaths = indexPaths[1:]
 		pkCol := x.getPKIsHandleCol()
 		if pkCol != nil && innerJoinKeys[0].Equal(nil, pkCol) {
 			innerPlan := x.forceToTableScan(pkCol)
@@ -272,7 +272,8 @@ func (p *LogicalJoin) getIndexJoinByOuterIdx(prop *requiredProp, outerIdx int) [
 		remainedOfBest []expression.Expression
 		keyOff2IdxOff  []int
 	)
-	for _, indexInfo := range indices {
+	for _, path := range indexPaths {
+		indexInfo := path.index
 		ranges, remained, tmpKeyOff2IdxOff := p.buildRangeForIndexJoin(indexInfo, x, innerJoinKeys)
 		// We choose the index by the number of used columns of the range, the much the better.
 		// Notice that there may be the cases like `t1.a=t2.a and b > 2 and b < 1`. So ranges can be nil though the conditions are valid.

diff --git a/plan/logical_plan_builder.go b/plan/logical_plan_builder.go
@@ -1671,7 +1671,7 @@ func (b *planBuilder) buildDataSource(tn *ast.TableName) LogicalPlan {
 	tableInfo := tbl.Meta()
 	b.visitInfo = appendVisitInfo(b.visitInfo, mysql.SelectPriv, dbName.L, tableInfo.Name.L, "")
 
-	availableIdxes, err := getAvailableIndices(tn.IndexHints, tableInfo)
+	possiblePaths, err := getPossibleAccessPaths(tn.IndexHints, tableInfo)
 	if err != nil {
 		b.err = errors.Trace(err)
 		return nil
@@ -1685,12 +1685,12 @@ func (b *planBuilder) buildDataSource(tn *ast.TableName) LogicalPlan {
 	}
 
 	ds := DataSource{
-		DBName:           dbName,
-		tableInfo:        tableInfo,
-		statisticTable:   b.getStatsTable(tableInfo),
-		indexHints:       tn.IndexHints,
-		availableIndices: availableIdxes,
-		Columns:          make([]*model.ColumnInfo, 0, len(columns)),
+		DBName:              dbName,
+		tableInfo:           tableInfo,
+		statisticTable:      b.getStatsTable(tableInfo),
+		indexHints:          tn.IndexHints,
+		possibleAccessPaths: possiblePaths,
+		Columns:             make([]*model.ColumnInfo, 0, len(columns)),
 	}.init(b.ctx)
 
 	var handleCol *expression.Column

diff --git a/plan/logical_plans.go b/plan/logical_plans.go
@@ -14,13 +14,18 @@
 package plan
 
 import (
+	"math"
+
+	"github.com/juju/errors"
 	"github.com/pingcap/tidb/ast"
 	"github.com/pingcap/tidb/expression"
 	"github.com/pingcap/tidb/expression/aggregation"
 	"github.com/pingcap/tidb/model"
 	"github.com/pingcap/tidb/mysql"
 	"github.com/pingcap/tidb/statistics"
 	"github.com/pingcap/tidb/types"
+	"github.com/pingcap/tidb/util/ranger"
+	log "github.com/sirupsen/logrus"
 )
 
 var (
@@ -299,13 +304,88 @@ type DataSource struct {
 
 	statisticTable *statistics.Table
 
-	// availableIndices is used for storing result of availableIndices function.
-	availableIndices *availableIndices
+	// possibleAccessPaths stores all the possible index path for physical plan, including table scan.
+	// Please make sure table scan is always the first element.
+	possibleAccessPaths []*accessPath
+}
+
+// accessPath tells how we access one index or just access table.
+type accessPath struct {
+	index  *model.IndexInfo
+	ranges []*ranger.NewRange
+	// countAfterAccess is the row count after we apply range seek and before we use other filter to filter data.
+	countAfterAccess float64
+	// countAfterIndex is the row count after we apply filters on index and before we apply the table filters.
+	countAfterIndex float64
+	accessConds     []expression.Expression
+	eqCondCount     int
+	indexFilters    []expression.Expression
+	tableFilters    []expression.Expression
+	// isRowID indicates this path stores the information for table scan.
+	isRowID bool
+	// forced means this index is generated by `use/force index()`.
+	forced bool
 }
 
-type availableIndices struct {
-	indices          []*model.IndexInfo
-	includeTableScan bool
+func (ds *DataSource) prepareTablePath(path *accessPath) error {
+	var err error
+	sc := ds.ctx.GetSessionVars().StmtCtx
+	path.countAfterAccess = float64(ds.statisticTable.Count)
+	var pkCol *expression.Column
+	if pkCol != nil {
+		path.ranges = ranger.FullIntNewRange(mysql.HasUnsignedFlag(pkCol.RetType.Flag))
+	} else {
+		path.ranges = ranger.FullIntNewRange(false)
+	}
+	path.countAfterAccess = float64(ds.statisticTable.Count)
+	if len(ds.pushedDownConds) > 0 {
+		if pkCol != nil {
+			path.accessConds, path.tableFilters = ranger.DetachCondsForTableRange(ds.ctx, ds.pushedDownConds, pkCol)
+			path.ranges, err = ranger.BuildTableRange(path.accessConds, sc, pkCol.RetType)
+			if err != nil {
+				return errors.Trace(err)
+			}
+			path.countAfterAccess, err = ds.statisticTable.GetRowCountByIntColumnRanges(sc, pkCol.ID, path.ranges)
+			if err != nil {
+				return errors.Trace(err)
+			}
+		} else {
+			path.tableFilters = ds.pushedDownConds
+		}
+	}
+	return nil
+}
+
+func (ds *DataSource) prepareIndexPath(path *accessPath) error {
+	var err error
+	sc := ds.ctx.GetSessionVars().StmtCtx
+	path.ranges = ranger.FullNewRange()
+	path.countAfterAccess = float64(ds.statisticTable.Count)
+	path.countAfterIndex = float64(ds.statisticTable.Count)
+	idxCols, lengths := expression.IndexInfo2Cols(ds.schema.Columns, path.index)
+	if len(idxCols) != 0 {
+		path.ranges, path.accessConds, path.indexFilters, path.eqCondCount, err = ranger.DetachCondAndBuildRangeForIndex(ds.ctx, ds.pushedDownConds, idxCols, lengths)
+		if err != nil {
+			return errors.Trace(err)
+		}
+		path.countAfterAccess, err = ds.statisticTable.GetRowCountByIndexRanges(sc, path.index.ID, path.ranges)
+		if err != nil {
+			return errors.Trace(err)
+		}
+		path.indexFilters, path.tableFilters = splitIndexFilterConditions(path.indexFilters, path.index.Columns, ds.tableInfo)
+	} else {
+		path.indexFilters, path.tableFilters = splitIndexFilterConditions(ds.pushedDownConds, path.index.Columns, ds.tableInfo)
+	}
+	path.countAfterIndex = path.countAfterAccess
+	if path.indexFilters != nil {
+		selectivity, err := ds.statisticTable.Selectivity(ds.ctx, path.indexFilters)
+		if err != nil {
+			log.Warnf("An error happened: %v, we have to use the default selectivity", err.Error())
+			selectivity = selectionFactor
+		}
+		path.countAfterIndex = math.Max(path.countAfterAccess*selectivity, ds.statsAfterSelect.count)
+	}
+	return nil
 }
 
 func (ds *DataSource) getPKIsHandleCol() *expression.Column {

diff --git a/plan/optimizer.go b/plan/optimizer.go
@@ -147,7 +147,10 @@ func logicalOptimize(flag uint64, logic LogicalPlan) (LogicalPlan, error) {
 
 func physicalOptimize(logic LogicalPlan) (PhysicalPlan, error) {
 	logic.preparePossibleProperties()
-	logic.deriveStats()
+	_, err := logic.deriveStats()
+	if err != nil {
+		return nil, errors.Trace(err)
+	}
 	t, err := logic.findBestTask(&requiredProp{taskTp: rootTaskType, expectedCnt: math.MaxFloat64})
 	if err != nil {
 		return nil, errors.Trace(err)