From 2540df1af0102d7a60f6d80cde10f740cf2c24d6 Mon Sep 17 00:00:00 2001 From: Haibin Xie Date: Tue, 16 Jul 2019 16:32:23 +0800 Subject: [PATCH 1/4] executor, planner: support more analyze options --- executor/analyze.go | 39 ++++++++++++------------ executor/analyze_test.go | 21 ++++++++++--- executor/builder.go | 48 ++++++++++++++--------------- executor/executor_test.go | 1 + go.mod | 2 ++ go.sum | 6 ++-- planner/core/common_plans.go | 6 ++-- planner/core/planbuilder.go | 58 ++++++++++++++++++++++++++++-------- 8 files changed, 115 insertions(+), 66 deletions(-) diff --git a/executor/analyze.go b/executor/analyze.go index c159b82313206..05ae1c5ffc3e9 100644 --- a/executor/analyze.go +++ b/executor/analyze.go @@ -29,6 +29,7 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/failpoint" "github.com/pingcap/kvproto/pkg/debugpb" + "github.com/pingcap/parser/ast" "github.com/pingcap/parser/model" "github.com/pingcap/parser/mysql" "github.com/pingcap/tidb/distsql" @@ -70,10 +71,8 @@ var ( ) const ( - maxRegionSampleSize = 1000 - maxSketchSize = 10000 - defaultCMSketchDepth = 5 - defaultCMSketchWidth = 2048 + maxRegionSampleSize = 1000 + maxSketchSize = 10000 ) // Next implements the Executor Next interface. @@ -252,7 +251,7 @@ type AnalyzeIndexExec struct { analyzePB *tipb.AnalyzeReq result distsql.SelectResult countNullRes distsql.SelectResult - maxNumBuckets uint64 + opts map[ast.AnalyzeOptionType]uint64 job *statistics.AnalyzeJob } @@ -307,7 +306,7 @@ func (e *AnalyzeIndexExec) buildStatsFromResult(result distsql.SelectResult, nee hist := &statistics.Histogram{} var cms *statistics.CMSketch if needCMS { - cms = statistics.NewCMSketch(defaultCMSketchDepth, defaultCMSketchWidth) + cms = statistics.NewCMSketch(int32(e.opts[ast.AnalyzeOptCMSketchDepth]), int32(e.opts[ast.AnalyzeOptCMSketchWidth])) } for { data, err := result.NextRaw(context.TODO()) @@ -324,7 +323,7 @@ func (e *AnalyzeIndexExec) buildStatsFromResult(result distsql.SelectResult, nee } respHist := statistics.HistogramFromProto(resp.Hist) e.job.Update(int64(respHist.TotalRowCount())) - hist, err = statistics.MergeHistograms(e.ctx.GetSessionVars().StmtCtx, hist, respHist, int(e.maxNumBuckets)) + hist, err = statistics.MergeHistograms(e.ctx.GetSessionVars().StmtCtx, hist, respHist, int(e.opts[ast.AnalyzeOptNumBuckets])) if err != nil { return nil, nil, err } @@ -401,7 +400,7 @@ type AnalyzeColumnsExec struct { priority int analyzePB *tipb.AnalyzeReq resultHandler *tableResultHandler - maxNumBuckets uint64 + opts map[ast.AnalyzeOptionType]uint64 job *statistics.AnalyzeJob } @@ -465,7 +464,7 @@ func (e *AnalyzeColumnsExec) buildStats(ranges []*ranger.Range) (hists []*statis IsMerger: true, FMSketch: statistics.NewFMSketch(maxSketchSize), MaxSampleSize: int64(MaxSampleSize), - CMSketch: statistics.NewCMSketch(defaultCMSketchDepth, defaultCMSketchWidth), + CMSketch: statistics.NewCMSketch(int32(e.opts[ast.AnalyzeOptCMSketchDepth]), int32(e.opts[ast.AnalyzeOptCMSketchWidth])), } } for { @@ -486,7 +485,7 @@ func (e *AnalyzeColumnsExec) buildStats(ranges []*ranger.Range) (hists []*statis if e.pkInfo != nil { respHist := statistics.HistogramFromProto(resp.PkHist) rowCount = int64(respHist.TotalRowCount()) - pkHist, err = statistics.MergeHistograms(sc, pkHist, respHist, int(e.maxNumBuckets)) + pkHist, err = statistics.MergeHistograms(sc, pkHist, respHist, int(e.opts[ast.AnalyzeOptNumBuckets])) if err != nil { return nil, nil, err } @@ -516,7 +515,7 @@ func (e *AnalyzeColumnsExec) buildStats(ranges []*ranger.Range) (hists []*statis return nil, nil, err } } - hg, err := statistics.BuildColumn(e.ctx, int64(e.maxNumBuckets), col.ID, collectors[i], &col.FieldType) + hg, err := statistics.BuildColumn(e.ctx, int64(e.opts[ast.AnalyzeOptNumBuckets]), col.ID, collectors[i], &col.FieldType) if err != nil { return nil, nil, err } @@ -591,7 +590,7 @@ type AnalyzeFastExec struct { colsInfo []*model.ColumnInfo idxsInfo []*model.IndexInfo concurrency int - maxNumBuckets uint64 + opts map[ast.AnalyzeOptionType]uint64 tblInfo *model.TableInfo cache *tikv.RegionCache wg *sync.WaitGroup @@ -1006,9 +1005,9 @@ func (e *AnalyzeFastExec) buildColumnStats(ID int64, collector *statistics.Sampl data = append(data, bytes) } // Build CMSketch. - cmSketch, ndv, scaleRatio := statistics.NewCMSketchWithTopN(defaultCMSketchDepth, defaultCMSketchWidth, data, 20, uint64(rowCount)) + cmSketch, ndv, scaleRatio := statistics.NewCMSketchWithTopN(int32(e.opts[ast.AnalyzeOptCMSketchDepth]), int32(e.opts[ast.AnalyzeOptCMSketchWidth]), data, uint32(e.opts[ast.AnalyzeOptNumTopN]), uint64(rowCount)) // Build Histogram. - hist, err := statistics.BuildColumnHist(e.ctx, int64(e.maxNumBuckets), ID, collector, tp, rowCount, int64(ndv), collector.NullCount*int64(scaleRatio)) + hist, err := statistics.BuildColumnHist(e.ctx, int64(e.opts[ast.AnalyzeOptNumBuckets]), ID, collector, tp, rowCount, int64(ndv), collector.NullCount*int64(scaleRatio)) return hist, cmSketch, err } @@ -1029,20 +1028,20 @@ func (e *AnalyzeFastExec) buildIndexStats(idxInfo *model.IndexInfo, collector *s data[i] = append(data[i], sample.Value.GetBytes()[:preLen]) } } - numTop := uint32(20) - cmSketch, ndv, scaleRatio := statistics.NewCMSketchWithTopN(defaultCMSketchDepth, defaultCMSketchWidth, data[0], numTop, uint64(rowCount)) + numTop := uint32(e.opts[ast.AnalyzeOptNumTopN]) + cmSketch, ndv, scaleRatio := statistics.NewCMSketchWithTopN(int32(e.opts[ast.AnalyzeOptCMSketchDepth]), int32(e.opts[ast.AnalyzeOptCMSketchWidth]), data[0], numTop, uint64(rowCount)) // Build CM Sketch for each prefix and merge them into one. for i := 1; i < len(idxInfo.Columns); i++ { var curCMSketch *statistics.CMSketch // `ndv` should be the ndv of full index, so just rewrite it here. - curCMSketch, ndv, scaleRatio = statistics.NewCMSketchWithTopN(defaultCMSketchDepth, defaultCMSketchWidth, data[i], numTop, uint64(rowCount)) + curCMSketch, ndv, scaleRatio = statistics.NewCMSketchWithTopN(int32(e.opts[ast.AnalyzeOptCMSketchDepth]), int32(e.opts[ast.AnalyzeOptCMSketchWidth]), data[i], numTop, uint64(rowCount)) err := cmSketch.MergeCMSketch(curCMSketch, numTop) if err != nil { return nil, nil, err } } // Build Histogram. - hist, err := statistics.BuildColumnHist(e.ctx, int64(e.maxNumBuckets), idxInfo.ID, collector, types.NewFieldType(mysql.TypeBlob), rowCount, int64(ndv), collector.NullCount*int64(scaleRatio)) + hist, err := statistics.BuildColumnHist(e.ctx, int64(e.opts[ast.AnalyzeOptNumBuckets]), idxInfo.ID, collector, types.NewFieldType(mysql.TypeBlob), rowCount, int64(ndv), collector.NullCount*int64(scaleRatio)) return hist, cmSketch, err } @@ -1209,7 +1208,7 @@ func analyzeIndexIncremental(idxExec *analyzeIndexIncrementalExec) analyzeResult if err != nil { return analyzeResult{Err: err, job: idxExec.job} } - hist, err = statistics.MergeHistograms(idxExec.ctx.GetSessionVars().StmtCtx, idxExec.oldHist, hist, int(idxExec.maxNumBuckets)) + hist, err = statistics.MergeHistograms(idxExec.ctx.GetSessionVars().StmtCtx, idxExec.oldHist, hist, int(idxExec.opts[ast.AnalyzeOptNumBuckets])) if err != nil { return analyzeResult{Err: err, job: idxExec.job} } @@ -1252,7 +1251,7 @@ func analyzePKIncremental(colExec *analyzePKIncrementalExec) analyzeResult { return analyzeResult{Err: err, job: colExec.job} } hist := hists[0] - hist, err = statistics.MergeHistograms(colExec.ctx.GetSessionVars().StmtCtx, colExec.oldHist, hist, int(colExec.maxNumBuckets)) + hist, err = statistics.MergeHistograms(colExec.ctx.GetSessionVars().StmtCtx, colExec.oldHist, hist, int(colExec.opts[ast.AnalyzeOptNumBuckets])) if err != nil { return analyzeResult{Err: err, job: colExec.job} } diff --git a/executor/analyze_test.go b/executor/analyze_test.go index 50577ca7c50ba..7b6ef0c511407 100644 --- a/executor/analyze_test.go +++ b/executor/analyze_test.go @@ -123,18 +123,31 @@ func (s *testSuite1) TestAnalyzeParameters(c *C) { for i := 0; i < 20; i++ { tk.MustExec(fmt.Sprintf("insert into t values (%d)", i)) } + tk.MustExec(fmt.Sprintf("insert into t values (19), (19), (19)")) + tk.MustExec("set @@tidb_enable_fast_analyze = 1") + executor.MaxSampleSize = 30 tk.MustExec("analyze table t") is := executor.GetInfoSchema(tk.Se.(sessionctx.Context)) table, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t")) c.Assert(err, IsNil) tableInfo := table.Meta() tbl := s.dom.StatsHandle().GetTableStats(tableInfo) - c.Assert(tbl.Columns[1].Len(), Equals, 20) - - tk.MustExec("analyze table t with 4 buckets") + col := tbl.Columns[1] + c.Assert(col.Len(), Equals, 20) + c.Assert(len(col.CMSketch.TopN()), Equals, 20) + width, depth := col.CMSketch.GetWidthAndDepth() + c.Assert(depth, Equals, int32(5)) + c.Assert(width, Equals, int32(2048)) + + tk.MustExec("analyze table t with 4 buckets, 1 topn, 4 cmsketch width, 4 cmsketch depth") tbl = s.dom.StatsHandle().GetTableStats(tableInfo) - c.Assert(tbl.Columns[1].Len(), Equals, 4) + col = tbl.Columns[1] + c.Assert(col.Len(), Equals, 4) + c.Assert(len(col.CMSketch.TopN()), Equals, 1) + width, depth = col.CMSketch.GetWidthAndDepth() + c.Assert(depth, Equals, int32(4)) + c.Assert(width, Equals, int32(4)) } func (s *testSuite1) TestAnalyzeTooLongColumns(c *C) { diff --git a/executor/builder.go b/executor/builder.go index 20c4f9525e92a..4c9630be31db2 100644 --- a/executor/builder.go +++ b/executor/builder.go @@ -1398,7 +1398,7 @@ func (b *executorBuilder) buildDelete(v *plannercore.Delete) Executor { return deleteExec } -func (b *executorBuilder) buildAnalyzeIndexPushdown(task plannercore.AnalyzeIndexTask, maxNumBuckets uint64, autoAnalyze string) *analyzeTask { +func (b *executorBuilder) buildAnalyzeIndexPushdown(task plannercore.AnalyzeIndexTask, opts map[ast.AnalyzeOptionType]uint64, autoAnalyze string) *analyzeTask { _, offset := timeutil.Zone(b.ctx.GetSessionVars().Location()) sc := b.ctx.GetSessionVars().StmtCtx e := &AnalyzeIndexExec{ @@ -1412,24 +1412,24 @@ func (b *executorBuilder) buildAnalyzeIndexPushdown(task plannercore.AnalyzeInde Flags: sc.PushDownFlags(), TimeZoneOffset: offset, }, - maxNumBuckets: maxNumBuckets, + opts: opts, } e.analyzePB.IdxReq = &tipb.AnalyzeIndexReq{ - BucketSize: int64(maxNumBuckets), + BucketSize: int64(opts[ast.AnalyzeOptNumBuckets]), NumColumns: int32(len(task.IndexInfo.Columns)), } - depth := int32(defaultCMSketchDepth) - width := int32(defaultCMSketchWidth) + depth := int32(opts[ast.AnalyzeOptCMSketchDepth]) + width := int32(opts[ast.AnalyzeOptCMSketchWidth]) e.analyzePB.IdxReq.CmsketchDepth = &depth e.analyzePB.IdxReq.CmsketchWidth = &width job := &statistics.AnalyzeJob{DBName: task.DBName, TableName: task.TableName, PartitionName: task.PartitionName, JobInfo: autoAnalyze + "analyze index " + task.IndexInfo.Name.O} return &analyzeTask{taskType: idxTask, idxExec: e, job: job} } -func (b *executorBuilder) buildAnalyzeIndexIncremental(task plannercore.AnalyzeIndexTask, maxNumBuckets uint64) *analyzeTask { +func (b *executorBuilder) buildAnalyzeIndexIncremental(task plannercore.AnalyzeIndexTask, opts map[ast.AnalyzeOptionType]uint64) *analyzeTask { h := domain.GetDomain(b.ctx).StatsHandle() statsTbl := h.GetPartitionStats(&model.TableInfo{}, task.PhysicalTableID) - analyzeTask := b.buildAnalyzeIndexPushdown(task, maxNumBuckets, "") + analyzeTask := b.buildAnalyzeIndexPushdown(task, opts, "") if statsTbl.Pseudo { return analyzeTask } @@ -1460,7 +1460,7 @@ func (b *executorBuilder) buildAnalyzeIndexIncremental(task plannercore.AnalyzeI return analyzeTask } -func (b *executorBuilder) buildAnalyzeColumnsPushdown(task plannercore.AnalyzeColumnsTask, maxNumBuckets uint64, autoAnalyze string) *analyzeTask { +func (b *executorBuilder) buildAnalyzeColumnsPushdown(task plannercore.AnalyzeColumnsTask, opts map[ast.AnalyzeOptionType]uint64, autoAnalyze string) *analyzeTask { cols := task.ColsInfo if task.PKInfo != nil { cols = append([]*model.ColumnInfo{task.PKInfo}, cols...) @@ -1480,12 +1480,12 @@ func (b *executorBuilder) buildAnalyzeColumnsPushdown(task plannercore.AnalyzeCo Flags: sc.PushDownFlags(), TimeZoneOffset: offset, }, - maxNumBuckets: maxNumBuckets, + opts: opts, } - depth := int32(defaultCMSketchDepth) - width := int32(defaultCMSketchWidth) + depth := int32(opts[ast.AnalyzeOptCMSketchDepth]) + width := int32(opts[ast.AnalyzeOptCMSketchWidth]) e.analyzePB.ColReq = &tipb.AnalyzeColumnsReq{ - BucketSize: int64(maxNumBuckets), + BucketSize: int64(opts[ast.AnalyzeOptNumBuckets]), SampleSize: maxRegionSampleSize, SketchSize: maxSketchSize, ColumnsInfo: model.ColumnsToProto(cols, task.PKInfo != nil), @@ -1497,10 +1497,10 @@ func (b *executorBuilder) buildAnalyzeColumnsPushdown(task plannercore.AnalyzeCo return &analyzeTask{taskType: colTask, colExec: e, job: job} } -func (b *executorBuilder) buildAnalyzePKIncremental(task plannercore.AnalyzeColumnsTask, maxNumBuckets uint64) *analyzeTask { +func (b *executorBuilder) buildAnalyzePKIncremental(task plannercore.AnalyzeColumnsTask, opts map[ast.AnalyzeOptionType]uint64) *analyzeTask { h := domain.GetDomain(b.ctx).StatsHandle() statsTbl := h.GetPartitionStats(&model.TableInfo{}, task.PhysicalTableID) - analyzeTask := b.buildAnalyzeColumnsPushdown(task, maxNumBuckets, "") + analyzeTask := b.buildAnalyzeColumnsPushdown(task, opts, "") if statsTbl.Pseudo { return analyzeTask } @@ -1531,7 +1531,7 @@ func (b *executorBuilder) buildAnalyzePKIncremental(task plannercore.AnalyzeColu return analyzeTask } -func (b *executorBuilder) buildAnalyzeFastColumn(e *AnalyzeExec, task plannercore.AnalyzeColumnsTask, maxNumBuckets uint64) { +func (b *executorBuilder) buildAnalyzeFastColumn(e *AnalyzeExec, task plannercore.AnalyzeColumnsTask, opts map[ast.AnalyzeOptionType]uint64) { findTask := false for _, eTask := range e.tasks { if eTask.fastExec.physicalTableID == task.PhysicalTableID { @@ -1553,7 +1553,7 @@ func (b *executorBuilder) buildAnalyzeFastColumn(e *AnalyzeExec, task plannercor physicalTableID: task.PhysicalTableID, colsInfo: task.ColsInfo, pkInfo: task.PKInfo, - maxNumBuckets: maxNumBuckets, + opts: opts, tblInfo: task.TblInfo, concurrency: concurrency, wg: &sync.WaitGroup{}, @@ -1563,7 +1563,7 @@ func (b *executorBuilder) buildAnalyzeFastColumn(e *AnalyzeExec, task plannercor } } -func (b *executorBuilder) buildAnalyzeFastIndex(e *AnalyzeExec, task plannercore.AnalyzeIndexTask, maxNumBuckets uint64) { +func (b *executorBuilder) buildAnalyzeFastIndex(e *AnalyzeExec, task plannercore.AnalyzeIndexTask, opts map[ast.AnalyzeOptionType]uint64) { findTask := false for _, eTask := range e.tasks { if eTask.fastExec.physicalTableID == task.PhysicalTableID { @@ -1584,7 +1584,7 @@ func (b *executorBuilder) buildAnalyzeFastIndex(e *AnalyzeExec, task plannercore ctx: b.ctx, physicalTableID: task.PhysicalTableID, idxsInfo: []*model.IndexInfo{task.IndexInfo}, - maxNumBuckets: maxNumBuckets, + opts: opts, tblInfo: task.TblInfo, concurrency: concurrency, wg: &sync.WaitGroup{}, @@ -1607,12 +1607,12 @@ func (b *executorBuilder) buildAnalyze(v *plannercore.Analyze) Executor { } for _, task := range v.ColTasks { if task.Incremental { - e.tasks = append(e.tasks, b.buildAnalyzePKIncremental(task, v.MaxNumBuckets)) + e.tasks = append(e.tasks, b.buildAnalyzePKIncremental(task, v.Opts)) } else { if enableFastAnalyze { - b.buildAnalyzeFastColumn(e, task, v.MaxNumBuckets) + b.buildAnalyzeFastColumn(e, task, v.Opts) } else { - e.tasks = append(e.tasks, b.buildAnalyzeColumnsPushdown(task, v.MaxNumBuckets, autoAnalyze)) + e.tasks = append(e.tasks, b.buildAnalyzeColumnsPushdown(task, v.Opts, autoAnalyze)) } } if b.err != nil { @@ -1621,12 +1621,12 @@ func (b *executorBuilder) buildAnalyze(v *plannercore.Analyze) Executor { } for _, task := range v.IdxTasks { if task.Incremental { - e.tasks = append(e.tasks, b.buildAnalyzeIndexIncremental(task, v.MaxNumBuckets)) + e.tasks = append(e.tasks, b.buildAnalyzeIndexIncremental(task, v.Opts)) } else { if enableFastAnalyze { - b.buildAnalyzeFastIndex(e, task, v.MaxNumBuckets) + b.buildAnalyzeFastIndex(e, task, v.Opts) } else { - e.tasks = append(e.tasks, b.buildAnalyzeIndexPushdown(task, v.MaxNumBuckets, autoAnalyze)) + e.tasks = append(e.tasks, b.buildAnalyzeIndexPushdown(task, v.Opts, autoAnalyze)) } } if b.err != nil { diff --git a/executor/executor_test.go b/executor/executor_test.go index b68bebd4bfefb..8ef824798dec1 100644 --- a/executor/executor_test.go +++ b/executor/executor_test.go @@ -2705,6 +2705,7 @@ func (s *testSuite1) SetUpSuite(c *C) { mockstore.WithHijackClient(hijackClient), ) c.Assert(err, IsNil) + session.SetStatsLease(0) s.dom, err = session.BootstrapSession(s.store) c.Assert(err, IsNil) s.dom.SetStatsUpdating(true) diff --git a/go.mod b/go.mod index 0fd738a958d1f..e83985680336c 100644 --- a/go.mod +++ b/go.mod @@ -76,3 +76,5 @@ require ( sourcegraph.com/sourcegraph/appdash v0.0.0-20180531100431-4c381bd170b4 sourcegraph.com/sourcegraph/appdash-data v0.0.0-20151005221446-73f23eafcf67 ) + +replace github.com/pingcap/parser => github.com/lamxTyler/parser v0.0.0-20190716065453-67df3e574a75 diff --git a/go.sum b/go.sum index 2bf2b6d4c859c..02653126f0240 100644 --- a/go.sum +++ b/go.sum @@ -12,6 +12,7 @@ github.com/blacktear23/go-proxyprotocol v0.0.0-20180807104634-af7a81e8dd0d/go.mo github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/readline v0.0.0-20171208011716-f6d7a1f6fbf3/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= +github.com/client9/misspell v0.3.4 h1:ta993UF76GwbvJcIo3Y68y/M3WxlpEHPWIGDkJYwzJI= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/codahale/hdrhistogram v0.0.0-20161010025455-3a0bb77429bd h1:qMd81Ts1T2OTKmB4acZcyKaMtRnY5Y44NuXGX2GFJ1w= github.com/codahale/hdrhistogram v0.0.0-20161010025455-3a0bb77429bd/go.mod h1:sE/e/2PUdi/liOCUjSTXgM1o87ZssimdTWN964YiIeI= @@ -114,6 +115,8 @@ github.com/kr/pty v1.0.0/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/lamxTyler/parser v0.0.0-20190716065453-67df3e574a75 h1:ZonBpY0Ip4OJXnddWa0lnLnJecx8cpxTUkLTlIYDBew= +github.com/lamxTyler/parser v0.0.0-20190716065453-67df3e574a75/go.mod h1:6c1rwSy9dUuNebYdr1IMI4+/sT3/Q65MXP2UCg7/vJI= github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU= github.com/mattn/go-isatty v0.0.4/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= github.com/mattn/go-runewidth v0.0.2/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= @@ -163,8 +166,6 @@ github.com/pingcap/kvproto v0.0.0-20190703131923-d9830856b531/go.mod h1:QMdbTAXC github.com/pingcap/log v0.0.0-20190214045112-b37da76f67a7/go.mod h1:xsfkWVaFVV5B8e1K9seWfyJWFrIhbtUTAD8NV1Pq3+w= github.com/pingcap/log v0.0.0-20190307075452-bd41d9273596 h1:t2OQTpPJnrPDGlvA+3FwJptMTt6MEPdzK1Wt99oaefQ= github.com/pingcap/log v0.0.0-20190307075452-bd41d9273596/go.mod h1:WpHUKhNZ18v116SvGrmjkA9CBhYmuUTKL+p8JC9ANEw= -github.com/pingcap/parser v0.0.0-20190710072914-6cd203114f2d h1:vOZjn1ami1LIjtIj0i5QunGh/sHawbhiBCb1qPx373w= -github.com/pingcap/parser v0.0.0-20190710072914-6cd203114f2d/go.mod h1:1FNvfp9+J0wvc4kl8eGNh7Rqrxveg15jJoWo/a0uHwA= github.com/pingcap/pd v0.0.0-20190712044914-75a1f9f3062b h1:oS9PftxQqgcRouKhhdaB52tXhVLEP7Ng3Qqsd6Z18iY= github.com/pingcap/pd v0.0.0-20190712044914-75a1f9f3062b/go.mod h1:3DlDlFT7EF64A1bmb/tulZb6wbPSagm5G4p1AlhaEDs= github.com/pingcap/tidb-tools v2.1.3-0.20190321065848-1e8b48f5c168+incompatible h1:MkWCxgZpJBgY2f4HtwWMMFzSBb3+JPzeJgF3VrXE/bU= @@ -191,6 +192,7 @@ github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d h1:GoAlyOgbOEIFd github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= github.com/remyoudompheng/bigfft v0.0.0-20190512091148-babf20351dd7 h1:FUL3b97ZY2EPqg2NbXKuMHs5pXJB9hjj1fDHnF2vl28= github.com/remyoudompheng/bigfft v0.0.0-20190512091148-babf20351dd7/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= +github.com/sergi/go-diff v1.0.1-0.20180205163309-da645544ed44 h1:tB9NOR21++IjLyVx3/PCPhWMwqGNCMQEH96A6dMZ/gc= github.com/sergi/go-diff v1.0.1-0.20180205163309-da645544ed44/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo= github.com/shirou/gopsutil v2.18.10+incompatible h1:cy84jW6EVRPa5g9HAHrlbxMSIjBhDSX0OFYyMYminYs= github.com/shirou/gopsutil v2.18.10+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA= diff --git a/planner/core/common_plans.go b/planner/core/common_plans.go index 96a33507287f2..fc63e34432b86 100644 --- a/planner/core/common_plans.go +++ b/planner/core/common_plans.go @@ -489,9 +489,9 @@ type AnalyzeIndexTask struct { type Analyze struct { baseSchemaProducer - ColTasks []AnalyzeColumnsTask - IdxTasks []AnalyzeIndexTask - MaxNumBuckets uint64 + ColTasks []AnalyzeColumnsTask + IdxTasks []AnalyzeIndexTask + Opts map[ast.AnalyzeOptionType]uint64 } // LoadData represents a loaddata plan. diff --git a/planner/core/planbuilder.go b/planner/core/planbuilder.go index ebae4fc1b4dee..867cea64576ac 100644 --- a/planner/core/planbuilder.go +++ b/planner/core/planbuilder.go @@ -15,10 +15,10 @@ package core import ( "bytes" + "encoding/binary" "fmt" "strings" - "github.com/cznic/mathutil" "github.com/pingcap/errors" "github.com/pingcap/parser" "github.com/pingcap/parser/ast" @@ -819,7 +819,10 @@ func getPhysicalIDsAndPartitionNames(tblInfo *model.TableInfo, partitionNames [] } func (b *PlanBuilder) buildAnalyzeTable(as *ast.AnalyzeTableStmt) (Plan, error) { - p := &Analyze{MaxNumBuckets: as.MaxNumBuckets} + p := &Analyze{} + if err := handleAnalyzeOptions(p, as.AnalyzeOpts); err != nil { + return nil, err + } for _, tbl := range as.TableNames { if tbl.TableInfo.IsView() { return nil, errors.Errorf("analyze %s is not supported now.", tbl.Name.O) @@ -855,7 +858,10 @@ func (b *PlanBuilder) buildAnalyzeTable(as *ast.AnalyzeTableStmt) (Plan, error) } func (b *PlanBuilder) buildAnalyzeIndex(as *ast.AnalyzeTableStmt) (Plan, error) { - p := &Analyze{MaxNumBuckets: as.MaxNumBuckets} + p := &Analyze{} + if err := handleAnalyzeOptions(p, as.AnalyzeOpts); err != nil { + return nil, err + } tblInfo := as.TableNames[0].TableInfo physicalIDs, names, err := getPhysicalIDsAndPartitionNames(tblInfo, as.PartitionNames) if err != nil { @@ -883,7 +889,10 @@ func (b *PlanBuilder) buildAnalyzeIndex(as *ast.AnalyzeTableStmt) (Plan, error) } func (b *PlanBuilder) buildAnalyzeAllIndex(as *ast.AnalyzeTableStmt) (Plan, error) { - p := &Analyze{MaxNumBuckets: as.MaxNumBuckets} + p := &Analyze{} + if err := handleAnalyzeOptions(p, as.AnalyzeOpts); err != nil { + return nil, err + } tblInfo := as.TableNames[0].TableInfo physicalIDs, names, err := getPhysicalIDsAndPartitionNames(tblInfo, as.PartitionNames) if err != nil { @@ -907,10 +916,38 @@ func (b *PlanBuilder) buildAnalyzeAllIndex(as *ast.AnalyzeTableStmt) (Plan, erro return p, nil } -const ( - defaultMaxNumBuckets = 256 - numBucketsLimit = 1024 -) +const cmSketchSizeLimit = (6 * 1024 * 1024) / binary.MaxVarintLen32 + +var analyzeOptionLimit = map[ast.AnalyzeOptionType]uint64{ + ast.AnalyzeOptNumBuckets: 1024, + ast.AnalyzeOptNumTopN: 1024, + ast.AnalyzeOptCMSketchWidth: cmSketchSizeLimit, + ast.AnalyzeOptCMSketchDepth: cmSketchSizeLimit, +} + +var analyzeOptionDefault = map[ast.AnalyzeOptionType]uint64{ + ast.AnalyzeOptNumBuckets: 256, + ast.AnalyzeOptNumTopN: 20, + ast.AnalyzeOptCMSketchWidth: 2048, + ast.AnalyzeOptCMSketchDepth: 5, +} + +func handleAnalyzeOptions(p *Analyze, opts []ast.AnalyzeOpt) error { + p.Opts = make(map[ast.AnalyzeOptionType]uint64, len(analyzeOptionDefault)) + for key, val := range analyzeOptionDefault { + p.Opts[key] = val + } + for _, opt := range opts { + if opt.Value == 0 || opt.Value > analyzeOptionLimit[opt.Type] { + return errors.Errorf("value of analyze option %s should be positive and not larger than %d", ast.AnalyzeOptionString[opt.Type], analyzeOptionLimit[opt.Type]) + } + p.Opts[opt.Type] = opt.Value + } + if p.Opts[ast.AnalyzeOptCMSketchWidth]*p.Opts[ast.AnalyzeOptCMSketchDepth] > cmSketchSizeLimit { + return errors.Errorf("cm sketch size(depth * width) should not larger than %d", cmSketchSizeLimit) + } + return nil +} func (b *PlanBuilder) buildAnalyze(as *ast.AnalyzeTableStmt) (Plan, error) { // If enable fast analyze, the storage must be tikv.Storage. @@ -927,11 +964,6 @@ func (b *PlanBuilder) buildAnalyze(as *ast.AnalyzeTableStmt) (Plan, error) { b.visitInfo = appendVisitInfo(b.visitInfo, mysql.InsertPriv, tbl.Schema.O, tbl.Name.O, "", insertErr) b.visitInfo = appendVisitInfo(b.visitInfo, mysql.SelectPriv, tbl.Schema.O, tbl.Name.O, "", selectErr) } - if as.MaxNumBuckets == 0 { - as.MaxNumBuckets = defaultMaxNumBuckets - } else { - as.MaxNumBuckets = mathutil.MinUint64(as.MaxNumBuckets, numBucketsLimit) - } if as.IndexFlag { if len(as.IndexNames) == 0 { return b.buildAnalyzeAllIndex(as) From ead9a3e5e5704970073abcf732e9bfed6ca64930 Mon Sep 17 00:00:00 2001 From: Haibin Xie Date: Tue, 23 Jul 2019 16:41:48 +0800 Subject: [PATCH 2/4] update parser --- go.mod | 2 -- go.sum | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/go.mod b/go.mod index dd59712506e7e..5466cfc31ccdf 100644 --- a/go.mod +++ b/go.mod @@ -76,5 +76,3 @@ require ( sourcegraph.com/sourcegraph/appdash v0.0.0-20180531100431-4c381bd170b4 sourcegraph.com/sourcegraph/appdash-data v0.0.0-20151005221446-73f23eafcf67 ) - -replace github.com/pingcap/parser => github.com/lamxTyler/parser v0.0.0-20190716065453-67df3e574a75 diff --git a/go.sum b/go.sum index 02653126f0240..c3d7893b44c84 100644 --- a/go.sum +++ b/go.sum @@ -115,8 +115,6 @@ github.com/kr/pty v1.0.0/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= -github.com/lamxTyler/parser v0.0.0-20190716065453-67df3e574a75 h1:ZonBpY0Ip4OJXnddWa0lnLnJecx8cpxTUkLTlIYDBew= -github.com/lamxTyler/parser v0.0.0-20190716065453-67df3e574a75/go.mod h1:6c1rwSy9dUuNebYdr1IMI4+/sT3/Q65MXP2UCg7/vJI= github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU= github.com/mattn/go-isatty v0.0.4/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= github.com/mattn/go-runewidth v0.0.2/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= @@ -166,6 +164,8 @@ github.com/pingcap/kvproto v0.0.0-20190703131923-d9830856b531/go.mod h1:QMdbTAXC github.com/pingcap/log v0.0.0-20190214045112-b37da76f67a7/go.mod h1:xsfkWVaFVV5B8e1K9seWfyJWFrIhbtUTAD8NV1Pq3+w= github.com/pingcap/log v0.0.0-20190307075452-bd41d9273596 h1:t2OQTpPJnrPDGlvA+3FwJptMTt6MEPdzK1Wt99oaefQ= github.com/pingcap/log v0.0.0-20190307075452-bd41d9273596/go.mod h1:WpHUKhNZ18v116SvGrmjkA9CBhYmuUTKL+p8JC9ANEw= +github.com/pingcap/parser v0.0.0-20190723083556-57e1f3b7a1c1 h1:/L2n0wamoKiRlXOn7xCNk8ejgXJbjmC3X54pGYSgPvQ= +github.com/pingcap/parser v0.0.0-20190723083556-57e1f3b7a1c1/go.mod h1:1FNvfp9+J0wvc4kl8eGNh7Rqrxveg15jJoWo/a0uHwA= github.com/pingcap/pd v0.0.0-20190712044914-75a1f9f3062b h1:oS9PftxQqgcRouKhhdaB52tXhVLEP7Ng3Qqsd6Z18iY= github.com/pingcap/pd v0.0.0-20190712044914-75a1f9f3062b/go.mod h1:3DlDlFT7EF64A1bmb/tulZb6wbPSagm5G4p1AlhaEDs= github.com/pingcap/tidb-tools v2.1.3-0.20190321065848-1e8b48f5c168+incompatible h1:MkWCxgZpJBgY2f4HtwWMMFzSBb3+JPzeJgF3VrXE/bU= From b488ce9c2f725cecfbb91b2274116b9db93afef2 Mon Sep 17 00:00:00 2001 From: Haibin Xie Date: Tue, 23 Jul 2019 19:45:08 +0800 Subject: [PATCH 3/4] address comments --- executor/analyze_test.go | 2 +- planner/core/planbuilder.go | 19 +++++++++++++------ 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/executor/analyze_test.go b/executor/analyze_test.go index 7b6ef0c511407..ce22e5b1b4732 100644 --- a/executor/analyze_test.go +++ b/executor/analyze_test.go @@ -123,7 +123,7 @@ func (s *testSuite1) TestAnalyzeParameters(c *C) { for i := 0; i < 20; i++ { tk.MustExec(fmt.Sprintf("insert into t values (%d)", i)) } - tk.MustExec(fmt.Sprintf("insert into t values (19), (19), (19)")) + tk.MustExec("insert into t values (19), (19), (19)") tk.MustExec("set @@tidb_enable_fast_analyze = 1") executor.MaxSampleSize = 30 diff --git a/planner/core/planbuilder.go b/planner/core/planbuilder.go index 867cea64576ac..8c5301c2e9f64 100644 --- a/planner/core/planbuilder.go +++ b/planner/core/planbuilder.go @@ -29,6 +29,7 @@ import ( "github.com/pingcap/tidb/ddl" "github.com/pingcap/tidb/expression" "github.com/pingcap/tidb/infoschema" + "github.com/pingcap/tidb/kv" "github.com/pingcap/tidb/planner/property" "github.com/pingcap/tidb/sessionctx" "github.com/pingcap/tidb/sessionctx/stmtctx" @@ -916,13 +917,13 @@ func (b *PlanBuilder) buildAnalyzeAllIndex(as *ast.AnalyzeTableStmt) (Plan, erro return p, nil } -const cmSketchSizeLimit = (6 * 1024 * 1024) / binary.MaxVarintLen32 +var cmSketchSizeLimit = kv.TxnEntrySizeLimit / binary.MaxVarintLen32 var analyzeOptionLimit = map[ast.AnalyzeOptionType]uint64{ ast.AnalyzeOptNumBuckets: 1024, ast.AnalyzeOptNumTopN: 1024, - ast.AnalyzeOptCMSketchWidth: cmSketchSizeLimit, - ast.AnalyzeOptCMSketchDepth: cmSketchSizeLimit, + ast.AnalyzeOptCMSketchWidth: uint64(cmSketchSizeLimit), + ast.AnalyzeOptCMSketchDepth: uint64(cmSketchSizeLimit), } var analyzeOptionDefault = map[ast.AnalyzeOptionType]uint64{ @@ -938,12 +939,18 @@ func handleAnalyzeOptions(p *Analyze, opts []ast.AnalyzeOpt) error { p.Opts[key] = val } for _, opt := range opts { - if opt.Value == 0 || opt.Value > analyzeOptionLimit[opt.Type] { - return errors.Errorf("value of analyze option %s should be positive and not larger than %d", ast.AnalyzeOptionString[opt.Type], analyzeOptionLimit[opt.Type]) + if opt.Type == ast.AnalyzeOptNumTopN { + if opt.Value > analyzeOptionLimit[opt.Type] { + return errors.Errorf("value of analyze option %s should not larger than %d", ast.AnalyzeOptionString[opt.Type], analyzeOptionLimit[opt.Type]) + } + } else { + if opt.Value == 0 || opt.Value > analyzeOptionLimit[opt.Type] { + return errors.Errorf("value of analyze option %s should be positive and not larger than %d", ast.AnalyzeOptionString[opt.Type], analyzeOptionLimit[opt.Type]) + } } p.Opts[opt.Type] = opt.Value } - if p.Opts[ast.AnalyzeOptCMSketchWidth]*p.Opts[ast.AnalyzeOptCMSketchDepth] > cmSketchSizeLimit { + if p.Opts[ast.AnalyzeOptCMSketchWidth]*p.Opts[ast.AnalyzeOptCMSketchDepth] > uint64(cmSketchSizeLimit) { return errors.Errorf("cm sketch size(depth * width) should not larger than %d", cmSketchSizeLimit) } return nil From ed012815496340de1fef9c57a858de51a07fb03a Mon Sep 17 00:00:00 2001 From: Haibin Xie Date: Wed, 24 Jul 2019 12:29:34 +0800 Subject: [PATCH 4/4] address comments --- planner/core/planbuilder.go | 49 +++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 27 deletions(-) diff --git a/planner/core/planbuilder.go b/planner/core/planbuilder.go index 8c5301c2e9f64..e230035854a38 100644 --- a/planner/core/planbuilder.go +++ b/planner/core/planbuilder.go @@ -819,11 +819,8 @@ func getPhysicalIDsAndPartitionNames(tblInfo *model.TableInfo, partitionNames [] return ids, names, nil } -func (b *PlanBuilder) buildAnalyzeTable(as *ast.AnalyzeTableStmt) (Plan, error) { - p := &Analyze{} - if err := handleAnalyzeOptions(p, as.AnalyzeOpts); err != nil { - return nil, err - } +func (b *PlanBuilder) buildAnalyzeTable(as *ast.AnalyzeTableStmt, opts map[ast.AnalyzeOptionType]uint64) (Plan, error) { + p := &Analyze{Opts: opts} for _, tbl := range as.TableNames { if tbl.TableInfo.IsView() { return nil, errors.Errorf("analyze %s is not supported now.", tbl.Name.O) @@ -858,11 +855,8 @@ func (b *PlanBuilder) buildAnalyzeTable(as *ast.AnalyzeTableStmt) (Plan, error) return p, nil } -func (b *PlanBuilder) buildAnalyzeIndex(as *ast.AnalyzeTableStmt) (Plan, error) { - p := &Analyze{} - if err := handleAnalyzeOptions(p, as.AnalyzeOpts); err != nil { - return nil, err - } +func (b *PlanBuilder) buildAnalyzeIndex(as *ast.AnalyzeTableStmt, opts map[ast.AnalyzeOptionType]uint64) (Plan, error) { + p := &Analyze{Opts: opts} tblInfo := as.TableNames[0].TableInfo physicalIDs, names, err := getPhysicalIDsAndPartitionNames(tblInfo, as.PartitionNames) if err != nil { @@ -889,11 +883,8 @@ func (b *PlanBuilder) buildAnalyzeIndex(as *ast.AnalyzeTableStmt) (Plan, error) return p, nil } -func (b *PlanBuilder) buildAnalyzeAllIndex(as *ast.AnalyzeTableStmt) (Plan, error) { - p := &Analyze{} - if err := handleAnalyzeOptions(p, as.AnalyzeOpts); err != nil { - return nil, err - } +func (b *PlanBuilder) buildAnalyzeAllIndex(as *ast.AnalyzeTableStmt, opts map[ast.AnalyzeOptionType]uint64) (Plan, error) { + p := &Analyze{Opts: opts} tblInfo := as.TableNames[0].TableInfo physicalIDs, names, err := getPhysicalIDsAndPartitionNames(tblInfo, as.PartitionNames) if err != nil { @@ -933,27 +924,27 @@ var analyzeOptionDefault = map[ast.AnalyzeOptionType]uint64{ ast.AnalyzeOptCMSketchDepth: 5, } -func handleAnalyzeOptions(p *Analyze, opts []ast.AnalyzeOpt) error { - p.Opts = make(map[ast.AnalyzeOptionType]uint64, len(analyzeOptionDefault)) +func handleAnalyzeOptions(opts []ast.AnalyzeOpt) (map[ast.AnalyzeOptionType]uint64, error) { + optMap := make(map[ast.AnalyzeOptionType]uint64, len(analyzeOptionDefault)) for key, val := range analyzeOptionDefault { - p.Opts[key] = val + optMap[key] = val } for _, opt := range opts { if opt.Type == ast.AnalyzeOptNumTopN { if opt.Value > analyzeOptionLimit[opt.Type] { - return errors.Errorf("value of analyze option %s should not larger than %d", ast.AnalyzeOptionString[opt.Type], analyzeOptionLimit[opt.Type]) + return nil, errors.Errorf("value of analyze option %s should not larger than %d", ast.AnalyzeOptionString[opt.Type], analyzeOptionLimit[opt.Type]) } } else { if opt.Value == 0 || opt.Value > analyzeOptionLimit[opt.Type] { - return errors.Errorf("value of analyze option %s should be positive and not larger than %d", ast.AnalyzeOptionString[opt.Type], analyzeOptionLimit[opt.Type]) + return nil, errors.Errorf("value of analyze option %s should be positive and not larger than %d", ast.AnalyzeOptionString[opt.Type], analyzeOptionLimit[opt.Type]) } } - p.Opts[opt.Type] = opt.Value + optMap[opt.Type] = opt.Value } - if p.Opts[ast.AnalyzeOptCMSketchWidth]*p.Opts[ast.AnalyzeOptCMSketchDepth] > uint64(cmSketchSizeLimit) { - return errors.Errorf("cm sketch size(depth * width) should not larger than %d", cmSketchSizeLimit) + if optMap[ast.AnalyzeOptCMSketchWidth]*optMap[ast.AnalyzeOptCMSketchDepth] > uint64(cmSketchSizeLimit) { + return nil, errors.Errorf("cm sketch size(depth * width) should not larger than %d", cmSketchSizeLimit) } - return nil + return optMap, nil } func (b *PlanBuilder) buildAnalyze(as *ast.AnalyzeTableStmt) (Plan, error) { @@ -971,13 +962,17 @@ func (b *PlanBuilder) buildAnalyze(as *ast.AnalyzeTableStmt) (Plan, error) { b.visitInfo = appendVisitInfo(b.visitInfo, mysql.InsertPriv, tbl.Schema.O, tbl.Name.O, "", insertErr) b.visitInfo = appendVisitInfo(b.visitInfo, mysql.SelectPriv, tbl.Schema.O, tbl.Name.O, "", selectErr) } + opts, err := handleAnalyzeOptions(as.AnalyzeOpts) + if err != nil { + return nil, err + } if as.IndexFlag { if len(as.IndexNames) == 0 { - return b.buildAnalyzeAllIndex(as) + return b.buildAnalyzeAllIndex(as, opts) } - return b.buildAnalyzeIndex(as) + return b.buildAnalyzeIndex(as, opts) } - return b.buildAnalyzeTable(as) + return b.buildAnalyzeTable(as, opts) } func buildShowNextRowID() *expression.Schema {