From 9388b9e6af464b4fdc798a33fc8b420b5167adfb Mon Sep 17 00:00:00 2001 From: joccau Date: Thu, 22 Aug 2024 15:58:04 +0800 Subject: [PATCH 1/9] limit the count of getting ddlhistory jobs Signed-off-by: joccau --- pkg/ddl/ddl_history.go | 8 ++++++++ pkg/ddl/ddl_history_test.go | 11 +++++++++++ pkg/server/handler/tikvhandler/tikv_handler.go | 18 +++++++++--------- 3 files changed, 28 insertions(+), 9 deletions(-) diff --git a/pkg/ddl/ddl_history.go b/pkg/ddl/ddl_history.go index ed3f494bc2270..dae07be99f039 100644 --- a/pkg/ddl/ddl_history.go +++ b/pkg/ddl/ddl_history.go @@ -37,6 +37,8 @@ import ( const ( DefNumHistoryJobs = 10 batchNumHistoryJobs = 128 + // GetDDLHistoryCountMax is the max count for getting the ddl history once. + GetDDLHistoryCountMax = 1024 ) // AddHistoryDDLJob record the history job. @@ -150,9 +152,15 @@ func GetAllHistoryDDLJobs(m *meta.Meta) ([]*model.Job, error) { func ScanHistoryDDLJobs(m *meta.Meta, startJobID int64, limit int) ([]*model.Job, error) { var iter meta.LastJobIterator var err error + if startJobID == 0 { + // if 'start_job_id' == 0 and 'limit' == 0(default value), get the 1024 ddl history job by defaultly. + if limit == 0 { + limit = GetDDLHistoryCountMax + } iter, err = m.GetLastHistoryDDLJobsIterator() } else { + // if 'start_job_id' > 0, it must set value to 'limit' if limit == 0 { return nil, errors.New("when 'start_job_id' is specified, it must work with a 'limit'") } diff --git a/pkg/ddl/ddl_history_test.go b/pkg/ddl/ddl_history_test.go index 8455960e841f6..e550d94e6109f 100644 --- a/pkg/ddl/ddl_history_test.go +++ b/pkg/ddl/ddl_history_test.go @@ -97,3 +97,14 @@ func TestDDLHistoryBasic(t *testing.T) { require.NoError(t, err) } + +func TestScanHistoryDDLJobsWithErrorLimit(t *testing.T) { + var ( + m = &meta.Meta{} + startJobID int64 = 10 + limit = 0 + ) + + _, err := ddl.ScanHistoryDDLJobs(m, startJobID, limit) + require.ErrorContains(t, err, "when 'start_job_id' is specified, it must work with a 'limit'") +} diff --git a/pkg/server/handler/tikvhandler/tikv_handler.go b/pkg/server/handler/tikvhandler/tikv_handler.go index 5ed22f113cc27..472cb91ece1f7 100644 --- a/pkg/server/handler/tikvhandler/tikv_handler.go +++ b/pkg/server/handler/tikvhandler/tikv_handler.go @@ -1125,8 +1125,11 @@ func (h *TableHandler) ServeHTTP(w http.ResponseWriter, req *http.Request) { // ServeHTTP handles request of ddl jobs history. func (h DDLHistoryJobHandler) ServeHTTP(w http.ResponseWriter, req *http.Request) { - var jobID, limitID int - var err error + var ( + jobID = 0 + limitID = 0 + err error + ) if jobValue := req.FormValue(handler.JobID); len(jobValue) > 0 { jobID, err = strconv.Atoi(jobValue) if err != nil { @@ -1144,8 +1147,9 @@ func (h DDLHistoryJobHandler) ServeHTTP(w http.ResponseWriter, req *http.Request handler.WriteError(w, err) return } - if limitID < 1 { - handler.WriteError(w, errors.New("ddl history limit must be greater than 0")) + if limitID < 1 || limitID > ddl.GetDDLHistoryCountMax { + handler.WriteError(w, + errors.Errorf("ddl history limit must be greater than 0 and less than or equal to %v", ddl.GetDDLHistoryCountMax)) return } } @@ -1165,11 +1169,7 @@ func (h DDLHistoryJobHandler) getHistoryDDL(jobID, limit int) (jobs []*model.Job } txnMeta := meta.NewMeta(txn) - if jobID == 0 && limit == 0 { - jobs, err = ddl.GetAllHistoryDDLJobs(txnMeta) - } else { - jobs, err = ddl.ScanHistoryDDLJobs(txnMeta, int64(jobID), limit) - } + jobs, err = ddl.ScanHistoryDDLJobs(txnMeta, int64(jobID), limit) if err != nil { return nil, errors.Trace(err) } From 9767d810955742185924b2e2c9cc51fcd4e83465 Mon Sep 17 00:00:00 2001 From: joccau Date: Thu, 22 Aug 2024 16:55:54 +0800 Subject: [PATCH 2/9] add unit test Signed-off-by: joccau --- pkg/ddl/ddl_history.go | 8 +++---- pkg/ddl/ddl_history_test.go | 24 ++++++++++++++++++- .../handler/tikvhandler/tikv_handler.go | 4 ++-- 3 files changed, 29 insertions(+), 7 deletions(-) diff --git a/pkg/ddl/ddl_history.go b/pkg/ddl/ddl_history.go index dae07be99f039..03d22f2f6e146 100644 --- a/pkg/ddl/ddl_history.go +++ b/pkg/ddl/ddl_history.go @@ -37,8 +37,8 @@ import ( const ( DefNumHistoryJobs = 10 batchNumHistoryJobs = 128 - // GetDDLHistoryCountMax is the max count for getting the ddl history once. - GetDDLHistoryCountMax = 1024 + // DefNumGetDDLHistoryJobs is the max count for getting the ddl history once. + DefNumGetDDLHistoryJobs = 1024 ) // AddHistoryDDLJob record the history job. @@ -154,9 +154,9 @@ func ScanHistoryDDLJobs(m *meta.Meta, startJobID int64, limit int) ([]*model.Job var err error if startJobID == 0 { - // if 'start_job_id' == 0 and 'limit' == 0(default value), get the 1024 ddl history job by defaultly. + // if 'start_job_id' == 0 and 'limit' == 0(default value), get the last 1024 ddl history job by defaultly. if limit == 0 { - limit = GetDDLHistoryCountMax + limit = DefNumGetDDLHistoryJobs } iter, err = m.GetLastHistoryDDLJobsIterator() } else { diff --git a/pkg/ddl/ddl_history_test.go b/pkg/ddl/ddl_history_test.go index e550d94e6109f..90cdb03eb658a 100644 --- a/pkg/ddl/ddl_history_test.go +++ b/pkg/ddl/ddl_history_test.go @@ -33,6 +33,10 @@ import ( ) func TestDDLHistoryBasic(t *testing.T) { + var ( + DDLHistoryJobCount = 0 + ) + store := testkit.CreateMockStore(t) rs := pools.NewResourcePool(func() (pools.Resource, error) { newTk := testkit.NewTestKit(t, store) @@ -78,8 +82,9 @@ func TestDDLHistoryBasic(t *testing.T) { err = kv.RunInNewTxn(ctx, store, false, func(ctx context.Context, txn kv.Transaction) error { m := meta.NewMeta(txn) - _, err := ddl.GetAllHistoryDDLJobs(m) + jobs, err := ddl.GetAllHistoryDDLJobs(m) require.NoError(t, err) + DDLHistoryJobCount = len(jobs) return nil }) @@ -96,6 +101,23 @@ func TestDDLHistoryBasic(t *testing.T) { }) require.NoError(t, err) + + err = kv.RunInNewTxn(ctx, store, false, func(ctx context.Context, txn kv.Transaction) error { + m := meta.NewMeta(txn) + jobs, err := ddl.ScanHistoryDDLJobs(m, 0, 0) + require.NoError(t, err) + if DDLHistoryJobCount <= ddl.DefNumGetDDLHistoryJobs { + require.Equal(t, DDLHistoryJobCount, len(jobs)) + } else { + require.Equal(t, ddl.DefNumGetDDLHistoryJobs, len(jobs)) + } + require.True(t, len(jobs) > 2) + require.Equal(t, int64(2), jobs[DDLHistoryJobCount-2].ID) + require.Equal(t, int64(1), jobs[DDLHistoryJobCount-1].ID) + return nil + }) + + require.NoError(t, err) } func TestScanHistoryDDLJobsWithErrorLimit(t *testing.T) { diff --git a/pkg/server/handler/tikvhandler/tikv_handler.go b/pkg/server/handler/tikvhandler/tikv_handler.go index 472cb91ece1f7..ee8219f16edbb 100644 --- a/pkg/server/handler/tikvhandler/tikv_handler.go +++ b/pkg/server/handler/tikvhandler/tikv_handler.go @@ -1147,9 +1147,9 @@ func (h DDLHistoryJobHandler) ServeHTTP(w http.ResponseWriter, req *http.Request handler.WriteError(w, err) return } - if limitID < 1 || limitID > ddl.GetDDLHistoryCountMax { + if limitID < 1 || limitID > ddl.DefNumGetDDLHistoryJobs { handler.WriteError(w, - errors.Errorf("ddl history limit must be greater than 0 and less than or equal to %v", ddl.GetDDLHistoryCountMax)) + errors.Errorf("ddl history limit must be greater than 0 and less than or equal to %v", ddl.DefNumGetDDLHistoryJobs)) return } } From 7eaa70be938a39d48085d0e98284529a87de621a Mon Sep 17 00:00:00 2001 From: joccau Date: Mon, 26 Aug 2024 17:34:22 +0800 Subject: [PATCH 3/9] rename local variables Signed-off-by: joccau --- pkg/ddl/ddl_history_test.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pkg/ddl/ddl_history_test.go b/pkg/ddl/ddl_history_test.go index 90cdb03eb658a..85f18d018bfc1 100644 --- a/pkg/ddl/ddl_history_test.go +++ b/pkg/ddl/ddl_history_test.go @@ -34,7 +34,7 @@ import ( func TestDDLHistoryBasic(t *testing.T) { var ( - DDLHistoryJobCount = 0 + ddlHistoryJobCount = 0 ) store := testkit.CreateMockStore(t) @@ -84,7 +84,7 @@ func TestDDLHistoryBasic(t *testing.T) { m := meta.NewMeta(txn) jobs, err := ddl.GetAllHistoryDDLJobs(m) require.NoError(t, err) - DDLHistoryJobCount = len(jobs) + ddlHistoryJobCount = len(jobs) return nil }) @@ -106,14 +106,14 @@ func TestDDLHistoryBasic(t *testing.T) { m := meta.NewMeta(txn) jobs, err := ddl.ScanHistoryDDLJobs(m, 0, 0) require.NoError(t, err) - if DDLHistoryJobCount <= ddl.DefNumGetDDLHistoryJobs { - require.Equal(t, DDLHistoryJobCount, len(jobs)) + if ddlHistoryJobCount <= ddl.DefNumGetDDLHistoryJobs { + require.Equal(t, ddlHistoryJobCount, len(jobs)) } else { require.Equal(t, ddl.DefNumGetDDLHistoryJobs, len(jobs)) } require.True(t, len(jobs) > 2) - require.Equal(t, int64(2), jobs[DDLHistoryJobCount-2].ID) - require.Equal(t, int64(1), jobs[DDLHistoryJobCount-1].ID) + require.Equal(t, int64(2), jobs[ddlHistoryJobCount-2].ID) + require.Equal(t, int64(1), jobs[ddlHistoryJobCount-1].ID) return nil }) From 61b4dcd1ccfb613fd010c7f124ba2528eefdf6c7 Mon Sep 17 00:00:00 2001 From: joccau Date: Tue, 27 Aug 2024 21:18:25 +0800 Subject: [PATCH 4/9] maintain test case Signed-off-by: joccau --- pkg/server/handler/tests/http_handler_test.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pkg/server/handler/tests/http_handler_test.go b/pkg/server/handler/tests/http_handler_test.go index 6ce3a50acfcdd..06fafdc762116 100644 --- a/pkg/server/handler/tests/http_handler_test.go +++ b/pkg/server/handler/tests/http_handler_test.go @@ -16,6 +16,7 @@ package tests import ( "bytes" + "cmp" "context" "crypto/tls" "crypto/x509" @@ -1063,6 +1064,11 @@ func TestAllHistory(t *testing.T) { data, err := ddl.GetAllHistoryDDLJobs(txnMeta) require.NoError(t, err) err = decoder.Decode(&jobs) + require.True(t, len(jobs) < ddl.DefNumGetDDLHistoryJobs) + // sort job. + slices.SortFunc(jobs, func(i, j *model.Job) int { + return cmp.Compare(i.ID, j.ID) + }) require.NoError(t, err) require.NoError(t, resp.Body.Close()) From f3b50842c8205510c48e27bba7636fbf16ca6466 Mon Sep 17 00:00:00 2001 From: joccau Date: Wed, 28 Aug 2024 10:43:59 +0800 Subject: [PATCH 5/9] update document about tidb https Signed-off-by: joccau --- docs/tidb_http_api.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/tidb_http_api.md b/docs/tidb_http_api.md index 4de2901c3925d..3e8b365f649cd 100644 --- a/docs/tidb_http_api.md +++ b/docs/tidb_http_api.md @@ -565,13 +565,13 @@ timezone.* **Note**: If you request a TiDB that is not ddl owner, the response will be `This node is not a ddl owner, can't be resigned.` -25. Get all TiDB DDL job history information. +25. Get the TiDB DDL job history information. ```shell curl http://{TiDBIP}:10080/ddl/history ``` - **Note**: When the DDL history is very very long, it may consume a lot memory and even cause OOM. Consider adding `start_job_id` and `limit`. + **Note**: When the DDL history is very very long, it may contains too many jobs. This interface will get 1024 history ddl jobs by default. If you want get more jobs, consider adding `start_job_id` and `limit`. 26. Get count {number} TiDB DDL job history information. From b5dd8c2f22868d0b507955e0198e459893be1afa Mon Sep 17 00:00:00 2001 From: joccau Date: Wed, 28 Aug 2024 12:14:56 +0800 Subject: [PATCH 6/9] update default from 1024 to 2048 Signed-off-by: joccau --- docs/tidb_http_api.md | 2 +- pkg/ddl/ddl_history.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/tidb_http_api.md b/docs/tidb_http_api.md index 3e8b365f649cd..d02bc17cb226e 100644 --- a/docs/tidb_http_api.md +++ b/docs/tidb_http_api.md @@ -571,7 +571,7 @@ timezone.* curl http://{TiDBIP}:10080/ddl/history ``` - **Note**: When the DDL history is very very long, it may contains too many jobs. This interface will get 1024 history ddl jobs by default. If you want get more jobs, consider adding `start_job_id` and `limit`. + **Note**: When the DDL history is very very long, it may contains too many jobs. This interface will get a maximum of 2048 history ddl jobs by default. If you want get more jobs, consider adding `start_job_id` and `limit`. 26. Get count {number} TiDB DDL job history information. diff --git a/pkg/ddl/ddl_history.go b/pkg/ddl/ddl_history.go index 03d22f2f6e146..340e1366b864e 100644 --- a/pkg/ddl/ddl_history.go +++ b/pkg/ddl/ddl_history.go @@ -38,7 +38,7 @@ const ( DefNumHistoryJobs = 10 batchNumHistoryJobs = 128 // DefNumGetDDLHistoryJobs is the max count for getting the ddl history once. - DefNumGetDDLHistoryJobs = 1024 + DefNumGetDDLHistoryJobs = 2048 ) // AddHistoryDDLJob record the history job. From 8c24cec92e10e3c53aff972994298420c48867a8 Mon Sep 17 00:00:00 2001 From: joccau Date: Wed, 28 Aug 2024 12:35:55 +0800 Subject: [PATCH 7/9] update test with failpoint Signed-off-by: joccau --- pkg/ddl/ddl_history.go | 12 ++++++++++++ pkg/ddl/ddl_history_test.go | 10 ++++++++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/pkg/ddl/ddl_history.go b/pkg/ddl/ddl_history.go index 340e1366b864e..cc78783832740 100644 --- a/pkg/ddl/ddl_history.go +++ b/pkg/ddl/ddl_history.go @@ -22,6 +22,7 @@ import ( "strconv" "github.com/pingcap/errors" + "github.com/pingcap/failpoint" "github.com/pingcap/tidb/pkg/ddl/logutil" sess "github.com/pingcap/tidb/pkg/ddl/session" "github.com/pingcap/tidb/pkg/ddl/util" @@ -146,6 +147,8 @@ func GetAllHistoryDDLJobs(m *meta.Meta) ([]*model.Job, error) { return allJobs, nil } + + // ScanHistoryDDLJobs get some of the done DDL jobs. // When the DDL history is quite large, GetAllHistoryDDLJobs() API can't work well, because it makes the server OOM. // The result is in descending order by job ID. @@ -157,6 +160,14 @@ func ScanHistoryDDLJobs(m *meta.Meta, startJobID int64, limit int) ([]*model.Job // if 'start_job_id' == 0 and 'limit' == 0(default value), get the last 1024 ddl history job by defaultly. if limit == 0 { limit = DefNumGetDDLHistoryJobs + + failpoint.Inject("history-ddl-jobs-limit", func(val failpoint.Value) { + injectLimit, ok := val.(int) + if ok { + logutil.DDLLogger().Info("failpoint history-ddl-jobs-limit", zap.Int("limit", injectLimit)) + limit = injectLimit + } + }) } iter, err = m.GetLastHistoryDDLJobsIterator() } else { @@ -169,5 +180,6 @@ func ScanHistoryDDLJobs(m *meta.Meta, startJobID int64, limit int) ([]*model.Job if err != nil { return nil, errors.Trace(err) } + return iter.GetLastJobs(limit, nil) } diff --git a/pkg/ddl/ddl_history_test.go b/pkg/ddl/ddl_history_test.go index 85f18d018bfc1..d47dcc7b0cf24 100644 --- a/pkg/ddl/ddl_history_test.go +++ b/pkg/ddl/ddl_history_test.go @@ -23,6 +23,7 @@ import ( "testing" "github.com/ngaut/pools" + "github.com/pingcap/failpoint" "github.com/pingcap/tidb/pkg/ddl" "github.com/pingcap/tidb/pkg/ddl/session" "github.com/pingcap/tidb/pkg/kv" @@ -102,14 +103,19 @@ func TestDDLHistoryBasic(t *testing.T) { require.NoError(t, err) + require.NoError(t, failpoint.Enable("github.com/pingcap/tidb/pkg/ddl/history-ddl-jobs-limit", "return(128)")) + defer func() { + require.NoError(t, failpoint.Disable("github.com/pingcap/tidb/pkg/ddl/history-ddl-jobs-limit")) + }() + err = kv.RunInNewTxn(ctx, store, false, func(ctx context.Context, txn kv.Transaction) error { m := meta.NewMeta(txn) jobs, err := ddl.ScanHistoryDDLJobs(m, 0, 0) require.NoError(t, err) - if ddlHistoryJobCount <= ddl.DefNumGetDDLHistoryJobs { + if ddlHistoryJobCount <= 128 { require.Equal(t, ddlHistoryJobCount, len(jobs)) } else { - require.Equal(t, ddl.DefNumGetDDLHistoryJobs, len(jobs)) + require.Equal(t, 128, len(jobs)) } require.True(t, len(jobs) > 2) require.Equal(t, int64(2), jobs[ddlHistoryJobCount-2].ID) From c03cc990ddc4763b53119039671aaa22812c02e1 Mon Sep 17 00:00:00 2001 From: joccau Date: Wed, 28 Aug 2024 12:39:37 +0800 Subject: [PATCH 8/9] fix typo Signed-off-by: joccau --- docs/tidb_http_api.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tidb_http_api.md b/docs/tidb_http_api.md index d02bc17cb226e..10402354706a4 100644 --- a/docs/tidb_http_api.md +++ b/docs/tidb_http_api.md @@ -571,7 +571,7 @@ timezone.* curl http://{TiDBIP}:10080/ddl/history ``` - **Note**: When the DDL history is very very long, it may contains too many jobs. This interface will get a maximum of 2048 history ddl jobs by default. If you want get more jobs, consider adding `start_job_id` and `limit`. + **Note**: When the DDL history is very very long, system table may containg too many jobs. This interface will get a maximum of 2048 history ddl jobs by default. If you want get more jobs, consider adding `start_job_id` and `limit`. 26. Get count {number} TiDB DDL job history information. From c0ed27f665e66c8b183c492875b8ffa07ed590b3 Mon Sep 17 00:00:00 2001 From: joccau Date: Wed, 28 Aug 2024 15:03:21 +0800 Subject: [PATCH 9/9] make fmt Signed-off-by: joccau --- pkg/ddl/ddl_history.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/pkg/ddl/ddl_history.go b/pkg/ddl/ddl_history.go index cc78783832740..1455f39a2db76 100644 --- a/pkg/ddl/ddl_history.go +++ b/pkg/ddl/ddl_history.go @@ -147,8 +147,6 @@ func GetAllHistoryDDLJobs(m *meta.Meta) ([]*model.Job, error) { return allJobs, nil } - - // ScanHistoryDDLJobs get some of the done DDL jobs. // When the DDL history is quite large, GetAllHistoryDDLJobs() API can't work well, because it makes the server OOM. // The result is in descending order by job ID.