Skip to content

Commit

Permalink
Lower the constants for the rate limiter in Job controller
Browse files Browse the repository at this point in the history
  • Loading branch information
mimowo committed Jun 16, 2023
1 parent c51a422 commit 74c5ff9
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 23 deletions.
18 changes: 11 additions & 7 deletions pkg/controller/job/job_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,14 @@ const (
var controllerKind = batch.SchemeGroupVersion.WithKind("Job")

var (
// DefaultJobBackOff is the default backoff period. Exported for tests.
DefaultJobBackOff = 10 * time.Second
// MaxJobBackOff is the max backoff period. Exported for tests.
MaxJobBackOff = 360 * time.Second
// DefaultJobApiBackOff is the default backoff period. Exported for tests.
DefaultJobApiBackOff = 1 * time.Second
// MaxJobApiBackOff is the max backoff period. Exported for tests.
MaxJobApiBackOff = 60 * time.Second
// DefaultJobPodFailureBackOff is the default backoff period. Exported for tests.
DefaultJobPodFailureBackOff = 10 * time.Second
// MaxJobPodFailureBackOff is the max backoff period. Exported for tests.
MaxJobPodFailureBackOff = 360 * time.Second
// MaxUncountedPods is the maximum size the slices in
// .status.uncountedTerminatedPods should have to keep their representation
// roughly below 20 KB. Exported for tests
Expand Down Expand Up @@ -148,8 +152,8 @@ func newControllerWithClock(ctx context.Context, podInformer coreinformers.PodIn
},
expectations: controller.NewControllerExpectations(),
finalizerExpectations: newUIDTrackingExpectations(),
queue: workqueue.NewRateLimitingQueueWithDelayingInterface(workqueue.NewDelayingQueueWithCustomClock(clock, "job"), workqueue.NewItemExponentialFailureRateLimiter(DefaultJobBackOff, MaxJobBackOff)),
orphanQueue: workqueue.NewRateLimitingQueueWithDelayingInterface(workqueue.NewDelayingQueueWithCustomClock(clock, "job_orphan_pod"), workqueue.NewItemExponentialFailureRateLimiter(DefaultJobBackOff, MaxJobBackOff)),
queue: workqueue.NewRateLimitingQueueWithDelayingInterface(workqueue.NewDelayingQueueWithCustomClock(clock, "job"), workqueue.NewItemExponentialFailureRateLimiter(DefaultJobApiBackOff, MaxJobApiBackOff)),
orphanQueue: workqueue.NewRateLimitingQueueWithDelayingInterface(workqueue.NewDelayingQueueWithCustomClock(clock, "job_orphan_pod"), workqueue.NewItemExponentialFailureRateLimiter(DefaultJobApiBackOff, MaxJobApiBackOff)),
broadcaster: eventBroadcaster,
recorder: eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "job-controller"}),
clock: clock,
Expand Down Expand Up @@ -1436,7 +1440,7 @@ func (jm *Controller) manageJob(ctx context.Context, job *batch.Job, activePods
}

if active < wantActive {
remainingTime := newBackoffRecord.getRemainingTime(jm.clock, DefaultJobBackOff, MaxJobBackOff)
remainingTime := newBackoffRecord.getRemainingTime(jm.clock, DefaultJobPodFailureBackOff, MaxJobPodFailureBackOff)
if remainingTime > 0 {
jm.enqueueSyncJobWithDelay(logger, job, remainingTime)
return 0, metrics.JobSyncActionPodsCreated, nil
Expand Down
10 changes: 5 additions & 5 deletions pkg/controller/job/job_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3106,8 +3106,8 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
func TestSyncJobUpdateRequeue(t *testing.T) {
_, ctx := ktesting.NewTestContext(t)
clientset := clientset.NewForConfigOrDie(&restclient.Config{Host: "", ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
defer func() { DefaultJobBackOff = 10 * time.Second }()
DefaultJobBackOff = time.Duration(0) // overwrite the default value for testing
defer func() { DefaultJobApiBackOff = 1 * time.Second }()
DefaultJobApiBackOff = time.Duration(0) // overwrite the default value for testing
cases := map[string]struct {
updateErr error
wantRequeuedImmediately bool
Expand Down Expand Up @@ -3136,7 +3136,7 @@ func TestSyncJobUpdateRequeue(t *testing.T) {
sharedInformerFactory.Batch().V1().Jobs().Informer().GetIndexer().Add(job)
manager.queue.Add(testutil.GetKey(job, t))
manager.processNextWorkItem(context.TODO())
// With DefaultJobBackOff=0, the queueing is synchronous.
// With DefaultJobApiBackOff=0, the queueing is synchronous.
requeued := manager.queue.Len() > 0
if requeued != tc.wantRequeuedImmediately {
t.Errorf("Unexpected requeue, got %t, want %t", requeued, tc.wantRequeuedImmediately)
Expand Down Expand Up @@ -3934,8 +3934,8 @@ func TestJobBackoffReset(t *testing.T) {

for name, tc := range testCases {
clientset := clientset.NewForConfigOrDie(&restclient.Config{Host: "", ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
defer func() { DefaultJobBackOff = 10 * time.Second }()
DefaultJobBackOff = time.Duration(0) // overwrite the default value for testing
defer func() { DefaultJobApiBackOff = 1 * time.Second }()
DefaultJobApiBackOff = time.Duration(0) // overwrite the default value for testing
manager, sharedInformerFactory := newControllerFromClient(ctx, clientset, controller.NoResyncPeriodFunc)
fakePodControl := controller.FakePodControl{}
manager.podControl = &fakePodControl
Expand Down
22 changes: 11 additions & 11 deletions test/integration/job/job_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1379,9 +1379,9 @@ func TestFinalizersClearedWhenBackoffLimitExceeded(t *testing.T) {

func TestJobPodsCreatedWithExponentialBackoff(t *testing.T) {
// overwrite the default value for faster testing
oldBackoff := jobcontroller.DefaultJobBackOff
defer func() { jobcontroller.DefaultJobBackOff = oldBackoff }()
jobcontroller.DefaultJobBackOff = 2 * time.Second
oldBackoff := jobcontroller.DefaultJobPodFailureBackOff
defer func() { jobcontroller.DefaultJobPodFailureBackOff = oldBackoff }()
jobcontroller.DefaultJobPodFailureBackOff = 2 * time.Second

closeFn, restConfig, clientSet, ns := setup(t, "simple")
defer closeFn()
Expand Down Expand Up @@ -1441,25 +1441,25 @@ func TestJobPodsCreatedWithExponentialBackoff(t *testing.T) {
return finishTime[i].Before(finishTime[j])
})

if creationTime[1].Sub(finishTime[0]).Seconds() < jobcontroller.DefaultJobBackOff.Seconds() {
t.Fatalf("Second pod should be created at least %v seconds after the first pod", jobcontroller.DefaultJobBackOff)
if creationTime[1].Sub(finishTime[0]).Seconds() < jobcontroller.DefaultJobPodFailureBackOff.Seconds() {
t.Fatalf("Second pod should be created at least %v seconds after the first pod", jobcontroller.DefaultJobPodFailureBackOff)
}

if creationTime[1].Sub(finishTime[0]).Seconds() >= 2*jobcontroller.DefaultJobBackOff.Seconds() {
t.Fatalf("Second pod should be created before %v seconds after the first pod", 2*jobcontroller.DefaultJobBackOff)
if creationTime[1].Sub(finishTime[0]).Seconds() >= 2*jobcontroller.DefaultJobPodFailureBackOff.Seconds() {
t.Fatalf("Second pod should be created before %v seconds after the first pod", 2*jobcontroller.DefaultJobPodFailureBackOff)
}

diff := creationTime[2].Sub(finishTime[1]).Seconds()

// The third pod should not be created before 4 seconds
if diff < 2*jobcontroller.DefaultJobBackOff.Seconds() {
t.Fatalf("Third pod should be created at least %v seconds after the second pod", 2*jobcontroller.DefaultJobBackOff)
if diff < 2*jobcontroller.DefaultJobPodFailureBackOff.Seconds() {
t.Fatalf("Third pod should be created at least %v seconds after the second pod", 2*jobcontroller.DefaultJobPodFailureBackOff)
}

// The third pod should be created within 8 seconds
// This check rules out double counting
if diff >= 4*jobcontroller.DefaultJobBackOff.Seconds() {
t.Fatalf("Third pod should be created before %v seconds after the second pod", 4*jobcontroller.DefaultJobBackOff)
if diff >= 4*jobcontroller.DefaultJobPodFailureBackOff.Seconds() {
t.Fatalf("Third pod should be created before %v seconds after the second pod", 4*jobcontroller.DefaultJobPodFailureBackOff)
}
}

Expand Down

0 comments on commit 74c5ff9

Please sign in to comment.