diff --git a/pkg/statistics/handle/BUILD.bazel b/pkg/statistics/handle/BUILD.bazel index 39371579c03a2..c582bf3c4a9a0 100644 --- a/pkg/statistics/handle/BUILD.bazel +++ b/pkg/statistics/handle/BUILD.bazel @@ -59,7 +59,7 @@ go_test( embed = [":handle"], flaky = True, race = "on", - shard_count = 11, + shard_count = 12, deps = [ "//pkg/config", "//pkg/parser/model", diff --git a/pkg/statistics/handle/bootstrap.go b/pkg/statistics/handle/bootstrap.go index 5b8bad0b7c21d..7dce1ca6517ee 100644 --- a/pkg/statistics/handle/bootstrap.go +++ b/pkg/statistics/handle/bootstrap.go @@ -117,6 +117,16 @@ func (h *Handle) initStatsMeta(is infoschema.InfoSchema) (util.StatsCache, error return tables, nil } +// initStatsHistogramsSQLGen generates the SQL to load all stats_histograms records. +func initStatsHistogramsSQLGen(isPaging bool) string { + selectPrefix := "select /*+ ORDER_INDEX(mysql.stats_histograms,tbl) */ HIGH_PRIORITY table_id, is_index, hist_id, distinct_count, version, null_count, cm_sketch, tot_col_size, stats_ver, correlation, flag, last_analyze_pos from mysql.stats_histograms" + orderSuffix := " order by table_id" + if !isPaging { + return selectPrefix + orderSuffix + } + return selectPrefix + " where table_id >= %? and table_id < %?" + orderSuffix +} + func (h *Handle) initStatsHistograms4ChunkLite(is infoschema.InfoSchema, cache util.StatsCache, iter *chunk.Iterator4Chunk) { var table *statistics.Table for row := iter.Begin(); row != iter.End(); row = iter.Next() { @@ -137,9 +147,9 @@ func (h *Handle) initStatsHistograms4ChunkLite(is infoschema.InfoSchema, cache u ndv := row.GetInt64(3) version := row.GetUint64(4) nullCount := row.GetInt64(5) - statsVer := row.GetInt64(7) - flag := row.GetInt64(9) - lastAnalyzePos := row.GetDatum(10, types.NewFieldType(mysql.TypeBlob)) + statsVer := row.GetInt64(8) + flag := row.GetInt64(10) + lastAnalyzePos := row.GetDatum(11, types.NewFieldType(mysql.TypeBlob)) tbl, _ := h.TableInfoByID(is, table.PhysicalID) if isIndex > 0 { var idxInfo *model.IndexInfo @@ -176,7 +186,7 @@ func (h *Handle) initStatsHistograms4ChunkLite(is infoschema.InfoSchema, cache u if colInfo == nil { continue } - hist := statistics.NewHistogram(id, ndv, nullCount, version, &colInfo.FieldType, 0, row.GetInt64(6)) + hist := statistics.NewHistogram(id, ndv, nullCount, version, &colInfo.FieldType, 0, row.GetInt64(7)) hist.Correlation = row.GetFloat64(8) col := &statistics.Column{ Histogram: *hist, @@ -288,7 +298,9 @@ func (h *Handle) initStatsHistograms4Chunk(is infoschema.InfoSchema, cache util. StatsVer: statsVer, } // primary key column has no stats info, because primary key's is_index is false. so it cannot load the topn - col.StatsLoadedStatus = statistics.NewStatsAllEvictedStatus() + if col.StatsAvailable() { + col.StatsLoadedStatus = statistics.NewStatsAllEvictedStatus() + } lastAnalyzePos.Copy(&col.LastAnalyzePos) table.Columns[hist.ID] = col } @@ -299,7 +311,7 @@ func (h *Handle) initStatsHistograms4Chunk(is infoschema.InfoSchema, cache util. } func (h *Handle) initStatsHistogramsLite(is infoschema.InfoSchema, cache util.StatsCache) error { - sql := "select /*+ ORDER_INDEX(mysql.stats_histograms,tbl)*/ HIGH_PRIORITY table_id, is_index, hist_id, distinct_count, version, null_count, tot_col_size, stats_ver, correlation, flag, last_analyze_pos from mysql.stats_histograms order by table_id" + sql := initStatsHistogramsSQLGen(false) rc, err := util.Exec(h.initStatsCtx, sql) if err != nil { return errors.Trace(err) @@ -322,7 +334,7 @@ func (h *Handle) initStatsHistogramsLite(is infoschema.InfoSchema, cache util.St } func (h *Handle) initStatsHistograms(is infoschema.InfoSchema, cache util.StatsCache) error { - sql := "select /*+ ORDER_INDEX(mysql.stats_histograms,tbl)*/ HIGH_PRIORITY table_id, is_index, hist_id, distinct_count, version, null_count, cm_sketch, tot_col_size, stats_ver, correlation, flag, last_analyze_pos from mysql.stats_histograms order by table_id" + sql := initStatsHistogramsSQLGen(false) rc, err := util.Exec(h.initStatsCtx, sql) if err != nil { return errors.Trace(err) @@ -359,10 +371,7 @@ func (h *Handle) initStatsHistogramsByPaging(is infoschema.InfoSchema, cache uti }() sctx := se.(sessionctx.Context) - // Why do we need to add `is_index=1` in the SQL? - // because it is aligned to the `initStatsTopN` function, which only loads the topn of the index too. - // the other will be loaded by sync load. - sql := "select HIGH_PRIORITY table_id, is_index, hist_id, distinct_count, version, null_count, cm_sketch, tot_col_size, stats_ver, correlation, flag, last_analyze_pos from mysql.stats_histograms where table_id >= %? and table_id < %? and is_index=1" + sql := initStatsHistogramsSQLGen(true) rc, err := util.Exec(sctx, sql, task.StartTid, task.EndTid) if err != nil { return errors.Trace(err) @@ -676,7 +685,7 @@ func (h *Handle) initStatsBuckets(cache util.StatsCache, totalMemory uint64) err return errors.Trace(err) } } else { - sql := "select /*+ ORDER_INDEX(mysql.stats_buckets,tbl)*/ HIGH_PRIORITY table_id, is_index, hist_id, count, repeats, lower_bound, upper_bound, ndv from mysql.stats_buckets order by table_id, is_index, hist_id, bucket_id" + sql := "select /*+ ORDER_INDEX(mysql.stats_buckets,tbl)*/ HIGH_PRIORITY table_id, is_index, hist_id, count, repeats, lower_bound, upper_bound, ndv from mysql.stats_buckets where is_index=1 order by table_id, is_index, hist_id, bucket_id" rc, err := util.Exec(h.initStatsCtx, sql) if err != nil { return errors.Trace(err) @@ -729,7 +738,7 @@ func (h *Handle) initStatsBucketsByPaging(cache util.StatsCache, task initstats. } }() sctx := se.(sessionctx.Context) - sql := "select HIGH_PRIORITY table_id, is_index, hist_id, count, repeats, lower_bound, upper_bound, ndv from mysql.stats_buckets where table_id >= %? and table_id < %? order by table_id, is_index, hist_id, bucket_id" + sql := "select HIGH_PRIORITY table_id, is_index, hist_id, count, repeats, lower_bound, upper_bound, ndv from mysql.stats_buckets where is_index = 1 and table_id >= %? and table_id < %? order by table_id, is_index, hist_id, bucket_id" rc, err := util.Exec(sctx, sql, task.StartTid, task.EndTid) if err != nil { return errors.Trace(err) diff --git a/pkg/statistics/handle/handle_hist.go b/pkg/statistics/handle/handle_hist.go index 63ca37f8337d4..da41535440d4e 100644 --- a/pkg/statistics/handle/handle_hist.go +++ b/pkg/statistics/handle/handle_hist.go @@ -23,7 +23,6 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/failpoint" "github.com/pingcap/tidb/pkg/config" - "github.com/pingcap/tidb/pkg/infoschema" "github.com/pingcap/tidb/pkg/metrics" "github.com/pingcap/tidb/pkg/parser/model" "github.com/pingcap/tidb/pkg/parser/mysql" @@ -178,7 +177,7 @@ func (h *Handle) removeHistLoadedColumns(neededItems []model.TableItemID) []mode continue } colHist, ok := tbl.Columns[item.ID] - if (ok && colHist.IsStatsInitialized() && !colHist.IsFullLoad()) || !ok { + if ok && colHist.IsStatsInitialized() && !colHist.IsFullLoad() { remainedItems = append(remainedItems, item) } } @@ -352,7 +351,7 @@ func (h *Handle) handleOneItemTask(task *NeededItemTask) (err error) { var errGetHistMeta = errors.New("fail to get stats version for this histogram") // readStatsForOneItem reads hist for one column/index, TODO load data via kv-get asynchronously -func (h *Handle) readStatsForOneItem(sctx sessionctx.Context, item model.TableItemID, w *statsWrapper) (*statsWrapper, error) { +func (*Handle) readStatsForOneItem(sctx sessionctx.Context, item model.TableItemID, w *statsWrapper) (*statsWrapper, error) { failpoint.Inject("mockReadStatsForOnePanic", nil) failpoint.Inject("mockReadStatsForOneFail", func(val failpoint.Value) { if val.(bool) { @@ -374,41 +373,9 @@ func (h *Handle) readStatsForOneItem(sctx sessionctx.Context, item model.TableIt return nil, errors.Trace(err) } } else { - if c == nil { - is := sctx.GetDomainInfoSchema().(infoschema.InfoSchema) - tbl, ok := h.TableInfoByID(is, item.TableID) - if !ok { - return nil, errors.New("no table") - } - var colInfo *model.ColumnInfo - for _, col := range tbl.Meta().Columns { - if col.ID == item.ID { - colInfo = col - break - } - } - if colInfo == nil { - return nil, errors.New("no column") - } - hg, _, _, _, err = storage.HistMetaFromStorageWithHighPriority(sctx, &item, colInfo) - if err != nil { - return nil, err - } - if hg != nil { - hg, err = storage.HistogramFromStorage(sctx, item.TableID, item.ID, &colInfo.FieldType, hg.NDV, int(isIndexFlag), hg.LastUpdateVersion, hg.NullCount, hg.TotColSize, hg.Correlation) - if err != nil { - return nil, errors.Trace(err) - } - } - c = &statistics.Column{ - Info: colInfo, - IsHandle: tbl.Meta().PKIsHandle && mysql.HasPriKeyFlag(colInfo.GetFlag()), - } - } else { - hg, err = storage.HistogramFromStorage(sctx, item.TableID, item.ID, &c.Info.FieldType, c.Histogram.NDV, int(isIndexFlag), c.LastUpdateVersion, c.NullCount, c.TotColSize, c.Correlation) - if err != nil { - return nil, errors.Trace(err) - } + hg, err = storage.HistogramFromStorage(sctx, item.TableID, item.ID, &c.Info.FieldType, c.Histogram.NDV, int(isIndexFlag), c.LastUpdateVersion, c.NullCount, c.TotColSize, c.Correlation) + if err != nil { + return nil, errors.Trace(err) } } var cms *statistics.CMSketch diff --git a/pkg/statistics/handle/handle_hist_test.go b/pkg/statistics/handle/handle_hist_test.go index eb973fdb88970..3f7447b0ddcef 100644 --- a/pkg/statistics/handle/handle_hist_test.go +++ b/pkg/statistics/handle/handle_hist_test.go @@ -403,3 +403,55 @@ func TestSendLoadRequestsWaitTooLong(t *testing.T) { require.Error(t, rs1.Err) } } + +func TestSyncLoadOnObjectWhichCanNotFoundInStorage(t *testing.T) { + store, dom := testkit.CreateMockStoreAndDomain(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("create table t(a int, b int, c int, primary key(a))") + h := dom.StatsHandle() + // Skip create table event. + <-h.DDLEventCh() + tk.MustExec("insert into t values (1,1,1),(2,2,2),(3,3,3)") + tk.MustExec("analyze table t columns a, b") + tbl, err := dom.InfoSchema().TableByName(model.NewCIStr("test"), model.NewCIStr("t")) + require.NoError(t, h.InitStatsLite(dom.InfoSchema())) + require.NoError(t, err) + require.NotNil(t, tbl) + tblInfo := tbl.Meta() + statsTbl, ok := h.Get(tblInfo.ID) + require.True(t, ok) + require.Equal(t, 2, len(statsTbl.Columns)) + // Do some DDL, one successfully handled by handleDDLEvent, the other not. + tk.MustExec("alter table t add column d int default 2") + require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh())) + require.NoError(t, h.Update(dom.InfoSchema())) + tbl, err = dom.InfoSchema().TableByName(model.NewCIStr("test"), model.NewCIStr("t")) + require.NoError(t, err) + require.NotNil(t, tbl) + tblInfo = tbl.Meta() + statsTbl, ok = h.Get(tblInfo.ID) + require.True(t, ok) + require.Equal(t, 3, len(statsTbl.Columns)) + + // Try sync load. + tk.MustExec("select * from t where a >= 1 and b = 2 and c = 3 and d = 4") + statsTbl, ok = h.Get(tblInfo.ID) + require.True(t, ok) + require.True(t, statsTbl.Columns[tblInfo.Columns[0].ID].IsFullLoad()) + require.True(t, statsTbl.Columns[tblInfo.Columns[1].ID].IsFullLoad()) + require.True(t, statsTbl.Columns[tblInfo.Columns[3].ID].IsFullLoad()) + require.Nil(t, statsTbl.Columns[tblInfo.Columns[2].ID]) + + // Analyze c then test sync load again + tk.MustExec("analyze table t columns a, b, c") + require.NoError(t, h.InitStatsLite(dom.InfoSchema())) + tk.MustExec("select * from t where a >= 1 and b = 2 and c = 3 and d = 4") + statsTbl, ok = h.Get(tblInfo.ID) + require.True(t, ok) + // a, b, d's status is not changed. + require.True(t, statsTbl.Columns[tblInfo.Columns[0].ID].IsFullLoad()) + require.True(t, statsTbl.Columns[tblInfo.Columns[1].ID].IsFullLoad()) + require.True(t, statsTbl.Columns[tblInfo.Columns[3].ID].IsFullLoad()) + require.True(t, statsTbl.Columns[tblInfo.Columns[2].ID].IsFullLoad()) +} diff --git a/pkg/statistics/handle/handletest/handle_test.go b/pkg/statistics/handle/handletest/handle_test.go index 50c3d3f630bb4..d0f16a5e19f5e 100644 --- a/pkg/statistics/handle/handletest/handle_test.go +++ b/pkg/statistics/handle/handletest/handle_test.go @@ -1606,7 +1606,12 @@ func TestInitStatsLite(t *testing.T) { statsTbl1 := h.GetTableStats(tblInfo) checkAllEvicted(t, statsTbl1) internal.AssertTableEqual(t, statsTbl0, statsTbl1) - + for _, col := range statsTbl1.Columns { + require.Equal(t, int64(statistics.Version2), col.StatsVer) + } + for _, idx := range statsTbl1.Indices { + require.Equal(t, int64(statistics.Version2), idx.StatsVer) + } // async stats load tk.MustExec("set @@tidb_stats_load_sync_wait = 0") tk.MustExec("explain select * from t where b > 1") diff --git a/pkg/statistics/handle/handletest/initstats/load_stats_test.go b/pkg/statistics/handle/handletest/initstats/load_stats_test.go index 8cec9fcaa4598..a2c3af1ed6174 100644 --- a/pkg/statistics/handle/handletest/initstats/load_stats_test.go +++ b/pkg/statistics/handle/handletest/initstats/load_stats_test.go @@ -86,7 +86,7 @@ func testConcurrentlyInitStats(t *testing.T) { tk.MustQuery(fmt.Sprintf("explain select * from t%v where b = 1", i)).CheckNotContain("pseudo") } for i := 1; i < 10; i++ { - tk.MustQuery(fmt.Sprintf("explain select * from t%v where c = 1", i)).CheckNotContain("pseudo") + tk.MustQuery(fmt.Sprintf("explain select * from t%v where c >= 1", i)).CheckNotContain("pseudo") } for i := 1; i < 10; i++ { tbl, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr(fmt.Sprintf("t%v", i))) diff --git a/pkg/statistics/handle/handletest/statstest/stats_test.go b/pkg/statistics/handle/handletest/statstest/stats_test.go index ecdd456e88637..83475acfe4d43 100644 --- a/pkg/statistics/handle/handletest/statstest/stats_test.go +++ b/pkg/statistics/handle/handletest/statstest/stats_test.go @@ -270,6 +270,9 @@ func TestInitStats(t *testing.T) { require.NoError(t, h.Update(is)) // Index and pk are loaded. needed := fmt.Sprintf(`Table:%v RealtimeCount:6 +column:1 ndv:6 totColSize:0 +column:2 ndv:6 totColSize:6 +column:3 ndv:6 totColSize:6 index:1 ndv:6 num: 1 lower_bound: 1 upper_bound: 1 repeats: 1 ndv: 0 num: 1 lower_bound: 2 upper_bound: 2 repeats: 1 ndv: 0 @@ -312,6 +315,12 @@ func TestInitStats2(t *testing.T) { h.Clear() require.NoError(t, h.Update(is)) table1 := h.GetTableStats(tbl.Meta()) + // stats of pk will be loaded. + require.Equal(t, true, table0.Columns[1].IsAllEvicted()) + require.Equal(t, true, table1.Columns[1].IsFullLoad()) + delete(table0.Columns, 1) + delete(table1.Columns, 1) + // result part is not changed. internal.AssertTableEqual(t, table0, table1) h.SetLease(0) }