From 74c5ff97f187813ee6aad21769e99a42b079243c Mon Sep 17 00:00:00 2001 From: Michal Wozniak Date: Fri, 16 Jun 2023 16:04:28 +0200 Subject: [PATCH] Lower the constants for the rate limiter in Job controller --- pkg/controller/job/job_controller.go | 18 +++++++++++------- pkg/controller/job/job_controller_test.go | 10 +++++----- test/integration/job/job_test.go | 22 +++++++++++----------- 3 files changed, 27 insertions(+), 23 deletions(-) diff --git a/pkg/controller/job/job_controller.go b/pkg/controller/job/job_controller.go index aad4c665dfb72..46b41aec3073d 100644 --- a/pkg/controller/job/job_controller.go +++ b/pkg/controller/job/job_controller.go @@ -69,10 +69,14 @@ const ( var controllerKind = batch.SchemeGroupVersion.WithKind("Job") var ( - // DefaultJobBackOff is the default backoff period. Exported for tests. - DefaultJobBackOff = 10 * time.Second - // MaxJobBackOff is the max backoff period. Exported for tests. - MaxJobBackOff = 360 * time.Second + // DefaultJobApiBackOff is the default backoff period. Exported for tests. + DefaultJobApiBackOff = 1 * time.Second + // MaxJobApiBackOff is the max backoff period. Exported for tests. + MaxJobApiBackOff = 60 * time.Second + // DefaultJobPodFailureBackOff is the default backoff period. Exported for tests. + DefaultJobPodFailureBackOff = 10 * time.Second + // MaxJobPodFailureBackOff is the max backoff period. Exported for tests. + MaxJobPodFailureBackOff = 360 * time.Second // MaxUncountedPods is the maximum size the slices in // .status.uncountedTerminatedPods should have to keep their representation // roughly below 20 KB. Exported for tests @@ -148,8 +152,8 @@ func newControllerWithClock(ctx context.Context, podInformer coreinformers.PodIn }, expectations: controller.NewControllerExpectations(), finalizerExpectations: newUIDTrackingExpectations(), - queue: workqueue.NewRateLimitingQueueWithDelayingInterface(workqueue.NewDelayingQueueWithCustomClock(clock, "job"), workqueue.NewItemExponentialFailureRateLimiter(DefaultJobBackOff, MaxJobBackOff)), - orphanQueue: workqueue.NewRateLimitingQueueWithDelayingInterface(workqueue.NewDelayingQueueWithCustomClock(clock, "job_orphan_pod"), workqueue.NewItemExponentialFailureRateLimiter(DefaultJobBackOff, MaxJobBackOff)), + queue: workqueue.NewRateLimitingQueueWithDelayingInterface(workqueue.NewDelayingQueueWithCustomClock(clock, "job"), workqueue.NewItemExponentialFailureRateLimiter(DefaultJobApiBackOff, MaxJobApiBackOff)), + orphanQueue: workqueue.NewRateLimitingQueueWithDelayingInterface(workqueue.NewDelayingQueueWithCustomClock(clock, "job_orphan_pod"), workqueue.NewItemExponentialFailureRateLimiter(DefaultJobApiBackOff, MaxJobApiBackOff)), broadcaster: eventBroadcaster, recorder: eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "job-controller"}), clock: clock, @@ -1436,7 +1440,7 @@ func (jm *Controller) manageJob(ctx context.Context, job *batch.Job, activePods } if active < wantActive { - remainingTime := newBackoffRecord.getRemainingTime(jm.clock, DefaultJobBackOff, MaxJobBackOff) + remainingTime := newBackoffRecord.getRemainingTime(jm.clock, DefaultJobPodFailureBackOff, MaxJobPodFailureBackOff) if remainingTime > 0 { jm.enqueueSyncJobWithDelay(logger, job, remainingTime) return 0, metrics.JobSyncActionPodsCreated, nil diff --git a/pkg/controller/job/job_controller_test.go b/pkg/controller/job/job_controller_test.go index 2431eb0d23ced..17c738122d629 100644 --- a/pkg/controller/job/job_controller_test.go +++ b/pkg/controller/job/job_controller_test.go @@ -3106,8 +3106,8 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) { func TestSyncJobUpdateRequeue(t *testing.T) { _, ctx := ktesting.NewTestContext(t) clientset := clientset.NewForConfigOrDie(&restclient.Config{Host: "", ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}}) - defer func() { DefaultJobBackOff = 10 * time.Second }() - DefaultJobBackOff = time.Duration(0) // overwrite the default value for testing + defer func() { DefaultJobApiBackOff = 1 * time.Second }() + DefaultJobApiBackOff = time.Duration(0) // overwrite the default value for testing cases := map[string]struct { updateErr error wantRequeuedImmediately bool @@ -3136,7 +3136,7 @@ func TestSyncJobUpdateRequeue(t *testing.T) { sharedInformerFactory.Batch().V1().Jobs().Informer().GetIndexer().Add(job) manager.queue.Add(testutil.GetKey(job, t)) manager.processNextWorkItem(context.TODO()) - // With DefaultJobBackOff=0, the queueing is synchronous. + // With DefaultJobApiBackOff=0, the queueing is synchronous. requeued := manager.queue.Len() > 0 if requeued != tc.wantRequeuedImmediately { t.Errorf("Unexpected requeue, got %t, want %t", requeued, tc.wantRequeuedImmediately) @@ -3934,8 +3934,8 @@ func TestJobBackoffReset(t *testing.T) { for name, tc := range testCases { clientset := clientset.NewForConfigOrDie(&restclient.Config{Host: "", ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}}) - defer func() { DefaultJobBackOff = 10 * time.Second }() - DefaultJobBackOff = time.Duration(0) // overwrite the default value for testing + defer func() { DefaultJobApiBackOff = 1 * time.Second }() + DefaultJobApiBackOff = time.Duration(0) // overwrite the default value for testing manager, sharedInformerFactory := newControllerFromClient(ctx, clientset, controller.NoResyncPeriodFunc) fakePodControl := controller.FakePodControl{} manager.podControl = &fakePodControl diff --git a/test/integration/job/job_test.go b/test/integration/job/job_test.go index 999e58b4ac7bc..532325e7178ca 100644 --- a/test/integration/job/job_test.go +++ b/test/integration/job/job_test.go @@ -1379,9 +1379,9 @@ func TestFinalizersClearedWhenBackoffLimitExceeded(t *testing.T) { func TestJobPodsCreatedWithExponentialBackoff(t *testing.T) { // overwrite the default value for faster testing - oldBackoff := jobcontroller.DefaultJobBackOff - defer func() { jobcontroller.DefaultJobBackOff = oldBackoff }() - jobcontroller.DefaultJobBackOff = 2 * time.Second + oldBackoff := jobcontroller.DefaultJobPodFailureBackOff + defer func() { jobcontroller.DefaultJobPodFailureBackOff = oldBackoff }() + jobcontroller.DefaultJobPodFailureBackOff = 2 * time.Second closeFn, restConfig, clientSet, ns := setup(t, "simple") defer closeFn() @@ -1441,25 +1441,25 @@ func TestJobPodsCreatedWithExponentialBackoff(t *testing.T) { return finishTime[i].Before(finishTime[j]) }) - if creationTime[1].Sub(finishTime[0]).Seconds() < jobcontroller.DefaultJobBackOff.Seconds() { - t.Fatalf("Second pod should be created at least %v seconds after the first pod", jobcontroller.DefaultJobBackOff) + if creationTime[1].Sub(finishTime[0]).Seconds() < jobcontroller.DefaultJobPodFailureBackOff.Seconds() { + t.Fatalf("Second pod should be created at least %v seconds after the first pod", jobcontroller.DefaultJobPodFailureBackOff) } - if creationTime[1].Sub(finishTime[0]).Seconds() >= 2*jobcontroller.DefaultJobBackOff.Seconds() { - t.Fatalf("Second pod should be created before %v seconds after the first pod", 2*jobcontroller.DefaultJobBackOff) + if creationTime[1].Sub(finishTime[0]).Seconds() >= 2*jobcontroller.DefaultJobPodFailureBackOff.Seconds() { + t.Fatalf("Second pod should be created before %v seconds after the first pod", 2*jobcontroller.DefaultJobPodFailureBackOff) } diff := creationTime[2].Sub(finishTime[1]).Seconds() // The third pod should not be created before 4 seconds - if diff < 2*jobcontroller.DefaultJobBackOff.Seconds() { - t.Fatalf("Third pod should be created at least %v seconds after the second pod", 2*jobcontroller.DefaultJobBackOff) + if diff < 2*jobcontroller.DefaultJobPodFailureBackOff.Seconds() { + t.Fatalf("Third pod should be created at least %v seconds after the second pod", 2*jobcontroller.DefaultJobPodFailureBackOff) } // The third pod should be created within 8 seconds // This check rules out double counting - if diff >= 4*jobcontroller.DefaultJobBackOff.Seconds() { - t.Fatalf("Third pod should be created before %v seconds after the second pod", 4*jobcontroller.DefaultJobBackOff) + if diff >= 4*jobcontroller.DefaultJobPodFailureBackOff.Seconds() { + t.Fatalf("Third pod should be created before %v seconds after the second pod", 4*jobcontroller.DefaultJobPodFailureBackOff) } }