Skip to content

Commit

Permalink
Merge branch 'master' into drop-local-temp-table
Browse files Browse the repository at this point in the history
  • Loading branch information
lcwangchao authored Jul 20, 2021
2 parents dcffa61 + 0bf495d commit ccdd722
Show file tree
Hide file tree
Showing 8 changed files with 95 additions and 95 deletions.
6 changes: 3 additions & 3 deletions planner/core/exhaust_physical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -959,7 +959,7 @@ func (p *LogicalJoin) constructInnerTableScanTask(
// TableScan as inner child of IndexJoin can return at most 1 tuple for each outer row.
RowCount: math.Min(1.0, countAfterAccess),
StatsVersion: ds.stats.StatsVersion,
// Cardinality would not be used in cost computation of IndexJoin, set leave it as default nil.
// NDV would not be used in cost computation of IndexJoin, set leave it as default nil.
}
rowSize := ds.TblColHists.GetTableAvgRowSize(p.ctx, ds.TblCols, ts.StoreType, true)
sessVars := ds.ctx.GetSessionVars()
Expand Down Expand Up @@ -1432,7 +1432,7 @@ func (ijHelper *indexJoinBuildHelper) updateBestChoice(ranges []*ranger.Range, p
}
var innerNDV float64
if stats := ijHelper.innerPlan.statsInfo(); stats != nil && stats.StatsVersion != statistics.PseudoVersion {
innerNDV = getCardinality(path.IdxCols[:usedColsLen], ijHelper.innerPlan.Schema(), stats)
innerNDV = getColsNDV(path.IdxCols[:usedColsLen], ijHelper.innerPlan.Schema(), stats)
}
// We choose the index by the NDV of the used columns, the larger the better.
// If NDVs are same, we choose index which uses more columns.
Expand Down Expand Up @@ -2141,7 +2141,7 @@ func (la *LogicalApply) exhaustPhysicalPlans(prop *property.PhysicalProperty) ([
}
cacheHitRatio := 0.0
if la.stats.RowCount != 0 {
ndv := getCardinality(columns, la.schema, la.stats)
ndv := getColsNDV(columns, la.schema, la.stats)
// for example, if there are 100 rows and the number of distinct values of these correlated columns
// are 70, then we can assume 30 rows can hit the cache so the cache hit ratio is 1 - (70/100) = 0.3
cacheHitRatio = 1 - (ndv / la.stats.RowCount)
Expand Down
4 changes: 2 additions & 2 deletions planner/core/plan.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ func optimizeByShuffle4Window(pp *PhysicalWindow, ctx sessionctx.Context) *Physi
for _, item := range pp.PartitionBy {
partitionBy = append(partitionBy, item.Col)
}
NDV := int(getCardinality(partitionBy, dataSource.Schema(), dataSource.statsInfo()))
NDV := int(getColsNDV(partitionBy, dataSource.Schema(), dataSource.statsInfo()))
if NDV <= 1 {
return nil
}
Expand Down Expand Up @@ -167,7 +167,7 @@ func optimizeByShuffle4StreamAgg(pp *PhysicalStreamAgg, ctx sessionctx.Context)
partitionBy = append(partitionBy, col)
}
}
NDV := int(getCardinality(partitionBy, dataSource.Schema(), dataSource.statsInfo()))
NDV := int(getColsNDV(partitionBy, dataSource.Schema(), dataSource.statsInfo()))
if NDV <= 1 {
return nil
}
Expand Down
154 changes: 77 additions & 77 deletions planner/core/stats.go

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion planner/core/stats_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ func (s *testStatsSuite) TestGroupNDVs(c *C) {
}
}

func (s *testStatsSuite) TestCardinalityGroupCols(c *C) {
func (s *testStatsSuite) TestNDVGroupCols(c *C) {
store, dom, err := newStoreWithBootstrap()
c.Assert(err, IsNil)
defer func() {
Expand Down
2 changes: 1 addition & 1 deletion planner/core/task.go
Original file line number Diff line number Diff line change
Expand Up @@ -849,7 +849,7 @@ func (p *PhysicalMergeJoin) GetCost(lCnt, rCnt float64) float64 {
cpuCost += probeCost
// For merge join, only one group of rows with same join key(not null) are cached,
// we compute average memory cost using estimated group size.
NDV := getCardinality(innerKeys, innerSchema, innerStats)
NDV := getColsNDV(innerKeys, innerSchema, innerStats)
memoryCost := (innerStats.RowCount / NDV) * sessVars.MemoryFactor
return cpuCost + memoryCost
}
Expand Down
2 changes: 1 addition & 1 deletion planner/core/testdata/stats_suite_in.json
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
]
},
{
"name": "TestCardinalityGroupCols",
"name": "TestNDVGroupCols",
"cases": [
// DataSource -> Aggregation.
"select count(1) from t1 group by a, b",
Expand Down
2 changes: 1 addition & 1 deletion planner/core/testdata/stats_suite_out.json
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@
]
},
{
"Name": "TestCardinalityGroupCols",
"Name": "TestNDVGroupCols",
"Cases": [
{
"SQL": "select count(1) from t1 group by a, b",
Expand Down
18 changes: 9 additions & 9 deletions planner/property/stats_info.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import (
"github.com/pingcap/tidb/statistics"
)

// GroupNDV stores the cardinality of a group of columns.
// GroupNDV stores the NDV of a group of columns.
type GroupNDV struct {
// Cols are the UniqueIDs of columns.
Cols []int64
Expand All @@ -35,39 +35,39 @@ func ToString(ndvs []GroupNDV) string {
type StatsInfo struct {
RowCount float64

// Column.UniqueID -> Cardinality
Cardinality map[int64]float64
// Column.UniqueID -> NDV
ColNDVs map[int64]float64

HistColl *statistics.HistColl
// StatsVersion indicates the statistics version of a table.
// If the StatsInfo is calculated using the pseudo statistics on a table, StatsVersion will be PseudoVersion.
StatsVersion uint64

// GroupNDVs stores the cardinality of column groups.
// GroupNDVs stores the NDV of column groups.
GroupNDVs []GroupNDV
}

// String implements fmt.Stringer interface.
func (s *StatsInfo) String() string {
return fmt.Sprintf("count %v, Cardinality %v", s.RowCount, s.Cardinality)
return fmt.Sprintf("count %v, ColNDVs %v", s.RowCount, s.ColNDVs)
}

// Count gets the RowCount in the StatsInfo.
func (s *StatsInfo) Count() int64 {
return int64(s.RowCount)
}

// Scale receives a selectivity and multiplies it with RowCount and Cardinality.
// Scale receives a selectivity and multiplies it with RowCount and NDV.
func (s *StatsInfo) Scale(factor float64) *StatsInfo {
profile := &StatsInfo{
RowCount: s.RowCount * factor,
Cardinality: make(map[int64]float64, len(s.Cardinality)),
ColNDVs: make(map[int64]float64, len(s.ColNDVs)),
HistColl: s.HistColl,
StatsVersion: s.StatsVersion,
GroupNDVs: make([]GroupNDV, len(s.GroupNDVs)),
}
for id, c := range s.Cardinality {
profile.Cardinality[id] = c * factor
for id, c := range s.ColNDVs {
profile.ColNDVs[id] = c * factor
}
for i, g := range s.GroupNDVs {
profile.GroupNDVs[i] = g
Expand Down

0 comments on commit ccdd722

Please sign in to comment.