Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

statistics: correct behavior of non-lite InitStats and stats sync load of no stats column (#57803) #59590

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pkg/statistics/handle/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ go_test(
embed = [":handle"],
flaky = True,
race = "on",
shard_count = 11,
shard_count = 12,
deps = [
"//pkg/config",
"//pkg/parser/model",
Expand Down
35 changes: 22 additions & 13 deletions pkg/statistics/handle/bootstrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,16 @@ func (h *Handle) initStatsMeta(is infoschema.InfoSchema) (util.StatsCache, error
return tables, nil
}

// initStatsHistogramsSQLGen generates the SQL to load all stats_histograms records.
func initStatsHistogramsSQLGen(isPaging bool) string {
selectPrefix := "select /*+ ORDER_INDEX(mysql.stats_histograms,tbl) */ HIGH_PRIORITY table_id, is_index, hist_id, distinct_count, version, null_count, cm_sketch, tot_col_size, stats_ver, correlation, flag, last_analyze_pos from mysql.stats_histograms"
orderSuffix := " order by table_id"
if !isPaging {
return selectPrefix + orderSuffix
}
return selectPrefix + " where table_id >= %? and table_id < %?" + orderSuffix
}

func (h *Handle) initStatsHistograms4ChunkLite(is infoschema.InfoSchema, cache util.StatsCache, iter *chunk.Iterator4Chunk) {
var table *statistics.Table
for row := iter.Begin(); row != iter.End(); row = iter.Next() {
Expand All @@ -137,9 +147,9 @@ func (h *Handle) initStatsHistograms4ChunkLite(is infoschema.InfoSchema, cache u
ndv := row.GetInt64(3)
version := row.GetUint64(4)
nullCount := row.GetInt64(5)
statsVer := row.GetInt64(7)
flag := row.GetInt64(9)
lastAnalyzePos := row.GetDatum(10, types.NewFieldType(mysql.TypeBlob))
statsVer := row.GetInt64(8)
flag := row.GetInt64(10)
lastAnalyzePos := row.GetDatum(11, types.NewFieldType(mysql.TypeBlob))
tbl, _ := h.TableInfoByID(is, table.PhysicalID)
if isIndex > 0 {
var idxInfo *model.IndexInfo
Expand Down Expand Up @@ -176,7 +186,7 @@ func (h *Handle) initStatsHistograms4ChunkLite(is infoschema.InfoSchema, cache u
if colInfo == nil {
continue
}
hist := statistics.NewHistogram(id, ndv, nullCount, version, &colInfo.FieldType, 0, row.GetInt64(6))
hist := statistics.NewHistogram(id, ndv, nullCount, version, &colInfo.FieldType, 0, row.GetInt64(7))
hist.Correlation = row.GetFloat64(8)
col := &statistics.Column{
Histogram: *hist,
Expand Down Expand Up @@ -288,7 +298,9 @@ func (h *Handle) initStatsHistograms4Chunk(is infoschema.InfoSchema, cache util.
StatsVer: statsVer,
}
// primary key column has no stats info, because primary key's is_index is false. so it cannot load the topn
col.StatsLoadedStatus = statistics.NewStatsAllEvictedStatus()
if col.StatsAvailable() {
col.StatsLoadedStatus = statistics.NewStatsAllEvictedStatus()
}
lastAnalyzePos.Copy(&col.LastAnalyzePos)
table.Columns[hist.ID] = col
}
Expand All @@ -299,7 +311,7 @@ func (h *Handle) initStatsHistograms4Chunk(is infoschema.InfoSchema, cache util.
}

func (h *Handle) initStatsHistogramsLite(is infoschema.InfoSchema, cache util.StatsCache) error {
sql := "select /*+ ORDER_INDEX(mysql.stats_histograms,tbl)*/ HIGH_PRIORITY table_id, is_index, hist_id, distinct_count, version, null_count, tot_col_size, stats_ver, correlation, flag, last_analyze_pos from mysql.stats_histograms order by table_id"
sql := initStatsHistogramsSQLGen(false)
rc, err := util.Exec(h.initStatsCtx, sql)
if err != nil {
return errors.Trace(err)
Expand All @@ -322,7 +334,7 @@ func (h *Handle) initStatsHistogramsLite(is infoschema.InfoSchema, cache util.St
}

func (h *Handle) initStatsHistograms(is infoschema.InfoSchema, cache util.StatsCache) error {
sql := "select /*+ ORDER_INDEX(mysql.stats_histograms,tbl)*/ HIGH_PRIORITY table_id, is_index, hist_id, distinct_count, version, null_count, cm_sketch, tot_col_size, stats_ver, correlation, flag, last_analyze_pos from mysql.stats_histograms order by table_id"
sql := initStatsHistogramsSQLGen(false)
rc, err := util.Exec(h.initStatsCtx, sql)
if err != nil {
return errors.Trace(err)
Expand Down Expand Up @@ -359,10 +371,7 @@ func (h *Handle) initStatsHistogramsByPaging(is infoschema.InfoSchema, cache uti
}()

sctx := se.(sessionctx.Context)
// Why do we need to add `is_index=1` in the SQL?
// because it is aligned to the `initStatsTopN` function, which only loads the topn of the index too.
// the other will be loaded by sync load.
sql := "select HIGH_PRIORITY table_id, is_index, hist_id, distinct_count, version, null_count, cm_sketch, tot_col_size, stats_ver, correlation, flag, last_analyze_pos from mysql.stats_histograms where table_id >= %? and table_id < %? and is_index=1"
sql := initStatsHistogramsSQLGen(true)
rc, err := util.Exec(sctx, sql, task.StartTid, task.EndTid)
if err != nil {
return errors.Trace(err)
Expand Down Expand Up @@ -676,7 +685,7 @@ func (h *Handle) initStatsBuckets(cache util.StatsCache, totalMemory uint64) err
return errors.Trace(err)
}
} else {
sql := "select /*+ ORDER_INDEX(mysql.stats_buckets,tbl)*/ HIGH_PRIORITY table_id, is_index, hist_id, count, repeats, lower_bound, upper_bound, ndv from mysql.stats_buckets order by table_id, is_index, hist_id, bucket_id"
sql := "select /*+ ORDER_INDEX(mysql.stats_buckets,tbl)*/ HIGH_PRIORITY table_id, is_index, hist_id, count, repeats, lower_bound, upper_bound, ndv from mysql.stats_buckets where is_index=1 order by table_id, is_index, hist_id, bucket_id"
rc, err := util.Exec(h.initStatsCtx, sql)
if err != nil {
return errors.Trace(err)
Expand Down Expand Up @@ -729,7 +738,7 @@ func (h *Handle) initStatsBucketsByPaging(cache util.StatsCache, task initstats.
}
}()
sctx := se.(sessionctx.Context)
sql := "select HIGH_PRIORITY table_id, is_index, hist_id, count, repeats, lower_bound, upper_bound, ndv from mysql.stats_buckets where table_id >= %? and table_id < %? order by table_id, is_index, hist_id, bucket_id"
sql := "select HIGH_PRIORITY table_id, is_index, hist_id, count, repeats, lower_bound, upper_bound, ndv from mysql.stats_buckets where is_index = 1 and table_id >= %? and table_id < %? order by table_id, is_index, hist_id, bucket_id"
rc, err := util.Exec(sctx, sql, task.StartTid, task.EndTid)
if err != nil {
return errors.Trace(err)
Expand Down
43 changes: 5 additions & 38 deletions pkg/statistics/handle/handle_hist.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ import (
"github.com/pingcap/errors"
"github.com/pingcap/failpoint"
"github.com/pingcap/tidb/pkg/config"
"github.com/pingcap/tidb/pkg/infoschema"
"github.com/pingcap/tidb/pkg/metrics"
"github.com/pingcap/tidb/pkg/parser/model"
"github.com/pingcap/tidb/pkg/parser/mysql"
Expand Down Expand Up @@ -178,7 +177,7 @@ func (h *Handle) removeHistLoadedColumns(neededItems []model.TableItemID) []mode
continue
}
colHist, ok := tbl.Columns[item.ID]
if (ok && colHist.IsStatsInitialized() && !colHist.IsFullLoad()) || !ok {
if ok && colHist.IsStatsInitialized() && !colHist.IsFullLoad() {
remainedItems = append(remainedItems, item)
}
}
Expand Down Expand Up @@ -352,7 +351,7 @@ func (h *Handle) handleOneItemTask(task *NeededItemTask) (err error) {
var errGetHistMeta = errors.New("fail to get stats version for this histogram")

// readStatsForOneItem reads hist for one column/index, TODO load data via kv-get asynchronously
func (h *Handle) readStatsForOneItem(sctx sessionctx.Context, item model.TableItemID, w *statsWrapper) (*statsWrapper, error) {
func (*Handle) readStatsForOneItem(sctx sessionctx.Context, item model.TableItemID, w *statsWrapper) (*statsWrapper, error) {
failpoint.Inject("mockReadStatsForOnePanic", nil)
failpoint.Inject("mockReadStatsForOneFail", func(val failpoint.Value) {
if val.(bool) {
Expand All @@ -374,41 +373,9 @@ func (h *Handle) readStatsForOneItem(sctx sessionctx.Context, item model.TableIt
return nil, errors.Trace(err)
}
} else {
if c == nil {
is := sctx.GetDomainInfoSchema().(infoschema.InfoSchema)
tbl, ok := h.TableInfoByID(is, item.TableID)
if !ok {
return nil, errors.New("no table")
}
var colInfo *model.ColumnInfo
for _, col := range tbl.Meta().Columns {
if col.ID == item.ID {
colInfo = col
break
}
}
if colInfo == nil {
return nil, errors.New("no column")
}
hg, _, _, _, err = storage.HistMetaFromStorageWithHighPriority(sctx, &item, colInfo)
if err != nil {
return nil, err
}
if hg != nil {
hg, err = storage.HistogramFromStorage(sctx, item.TableID, item.ID, &colInfo.FieldType, hg.NDV, int(isIndexFlag), hg.LastUpdateVersion, hg.NullCount, hg.TotColSize, hg.Correlation)
if err != nil {
return nil, errors.Trace(err)
}
}
c = &statistics.Column{
Info: colInfo,
IsHandle: tbl.Meta().PKIsHandle && mysql.HasPriKeyFlag(colInfo.GetFlag()),
}
} else {
hg, err = storage.HistogramFromStorage(sctx, item.TableID, item.ID, &c.Info.FieldType, c.Histogram.NDV, int(isIndexFlag), c.LastUpdateVersion, c.NullCount, c.TotColSize, c.Correlation)
if err != nil {
return nil, errors.Trace(err)
}
hg, err = storage.HistogramFromStorage(sctx, item.TableID, item.ID, &c.Info.FieldType, c.Histogram.NDV, int(isIndexFlag), c.LastUpdateVersion, c.NullCount, c.TotColSize, c.Correlation)
if err != nil {
return nil, errors.Trace(err)
}
}
var cms *statistics.CMSketch
Expand Down
52 changes: 52 additions & 0 deletions pkg/statistics/handle/handle_hist_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -403,3 +403,55 @@ func TestSendLoadRequestsWaitTooLong(t *testing.T) {
require.Error(t, rs1.Err)
}
}

func TestSyncLoadOnObjectWhichCanNotFoundInStorage(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
tk := testkit.NewTestKit(t, store)
tk.MustExec("use test")
tk.MustExec("create table t(a int, b int, c int, primary key(a))")
h := dom.StatsHandle()
// Skip create table event.
<-h.DDLEventCh()
tk.MustExec("insert into t values (1,1,1),(2,2,2),(3,3,3)")
tk.MustExec("analyze table t columns a, b")
tbl, err := dom.InfoSchema().TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
require.NoError(t, h.InitStatsLite(dom.InfoSchema()))
require.NoError(t, err)
require.NotNil(t, tbl)
tblInfo := tbl.Meta()
statsTbl, ok := h.Get(tblInfo.ID)
require.True(t, ok)
require.Equal(t, 2, len(statsTbl.Columns))
// Do some DDL, one successfully handled by handleDDLEvent, the other not.
tk.MustExec("alter table t add column d int default 2")
require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh()))
require.NoError(t, h.Update(dom.InfoSchema()))
tbl, err = dom.InfoSchema().TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
require.NoError(t, err)
require.NotNil(t, tbl)
tblInfo = tbl.Meta()
statsTbl, ok = h.Get(tblInfo.ID)
require.True(t, ok)
require.Equal(t, 3, len(statsTbl.Columns))

// Try sync load.
tk.MustExec("select * from t where a >= 1 and b = 2 and c = 3 and d = 4")
statsTbl, ok = h.Get(tblInfo.ID)
require.True(t, ok)
require.True(t, statsTbl.Columns[tblInfo.Columns[0].ID].IsFullLoad())
require.True(t, statsTbl.Columns[tblInfo.Columns[1].ID].IsFullLoad())
require.True(t, statsTbl.Columns[tblInfo.Columns[3].ID].IsFullLoad())
require.Nil(t, statsTbl.Columns[tblInfo.Columns[2].ID])

// Analyze c then test sync load again
tk.MustExec("analyze table t columns a, b, c")
require.NoError(t, h.InitStatsLite(dom.InfoSchema()))
tk.MustExec("select * from t where a >= 1 and b = 2 and c = 3 and d = 4")
statsTbl, ok = h.Get(tblInfo.ID)
require.True(t, ok)
// a, b, d's status is not changed.
require.True(t, statsTbl.Columns[tblInfo.Columns[0].ID].IsFullLoad())
require.True(t, statsTbl.Columns[tblInfo.Columns[1].ID].IsFullLoad())
require.True(t, statsTbl.Columns[tblInfo.Columns[3].ID].IsFullLoad())
require.True(t, statsTbl.Columns[tblInfo.Columns[2].ID].IsFullLoad())
}
7 changes: 6 additions & 1 deletion pkg/statistics/handle/handletest/handle_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1606,7 +1606,12 @@ func TestInitStatsLite(t *testing.T) {
statsTbl1 := h.GetTableStats(tblInfo)
checkAllEvicted(t, statsTbl1)
internal.AssertTableEqual(t, statsTbl0, statsTbl1)

for _, col := range statsTbl1.Columns {
require.Equal(t, int64(statistics.Version2), col.StatsVer)
}
for _, idx := range statsTbl1.Indices {
require.Equal(t, int64(statistics.Version2), idx.StatsVer)
}
// async stats load
tk.MustExec("set @@tidb_stats_load_sync_wait = 0")
tk.MustExec("explain select * from t where b > 1")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ func testConcurrentlyInitStats(t *testing.T) {
tk.MustQuery(fmt.Sprintf("explain select * from t%v where b = 1", i)).CheckNotContain("pseudo")
}
for i := 1; i < 10; i++ {
tk.MustQuery(fmt.Sprintf("explain select * from t%v where c = 1", i)).CheckNotContain("pseudo")
tk.MustQuery(fmt.Sprintf("explain select * from t%v where c >= 1", i)).CheckNotContain("pseudo")
}
for i := 1; i < 10; i++ {
tbl, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr(fmt.Sprintf("t%v", i)))
Expand Down
9 changes: 9 additions & 0 deletions pkg/statistics/handle/handletest/statstest/stats_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,9 @@ func TestInitStats(t *testing.T) {
require.NoError(t, h.Update(is))
// Index and pk are loaded.
needed := fmt.Sprintf(`Table:%v RealtimeCount:6
column:1 ndv:6 totColSize:0
column:2 ndv:6 totColSize:6
column:3 ndv:6 totColSize:6
index:1 ndv:6
num: 1 lower_bound: 1 upper_bound: 1 repeats: 1 ndv: 0
num: 1 lower_bound: 2 upper_bound: 2 repeats: 1 ndv: 0
Expand Down Expand Up @@ -312,6 +315,12 @@ func TestInitStats2(t *testing.T) {
h.Clear()
require.NoError(t, h.Update(is))
table1 := h.GetTableStats(tbl.Meta())
// stats of pk will be loaded.
require.Equal(t, true, table0.Columns[1].IsAllEvicted())
require.Equal(t, true, table1.Columns[1].IsFullLoad())
delete(table0.Columns, 1)
delete(table1.Columns, 1)
// result part is not changed.
internal.AssertTableEqual(t, table0, table1)
h.SetLease(0)
}
Expand Down