From 50541eddb281c87abbf968e583f3041367a0fcf0 Mon Sep 17 00:00:00 2001
From: disksing <i@disksing.com>
Date: Tue, 9 Apr 2024 10:34:19 +0800
Subject: [PATCH] resource_group: add retry configurations

Signed-off-by: disksing <i@disksing.com>
---
 client/resource_group/controller/config.go    | 16 ++++++++++++++
 .../resource_group/controller/controller.go   | 22 ++++++++++++++-----
 2 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/client/resource_group/controller/config.go b/client/resource_group/controller/config.go
index ffc360c385c..a4176c073cc 100644
--- a/client/resource_group/controller/config.go
+++ b/client/resource_group/controller/config.go
@@ -52,6 +52,10 @@ const (
 	defaultTargetPeriod = 5 * time.Second
 	// defaultMaxWaitDuration is the max duration to wait for the token before throwing error.
 	defaultMaxWaitDuration = 30 * time.Second
+	// defaultWaitRetryTimes is the times to retry when waiting for the token.
+	defaultWaitRetryTimes = 10
+	// defaultWaitRetryInterval is the interval to retry when waiting for the token.
+	defaultWaitRetryInterval = 50 * time.Millisecond
 )
 
 const (
@@ -85,6 +89,12 @@ type Config struct {
 	// LTBMaxWaitDuration is the max wait time duration for local token bucket.
 	LTBMaxWaitDuration Duration `toml:"ltb-max-wait-duration" json:"ltb-max-wait-duration"`
 
+	// WaitRetryInterval is the interval to retry when waiting for the token.
+	WaitRetryInterval Duration `toml:"wait-retry-interval" json:"wait-retry-interval"`
+
+	// WaitRetryTimes is the times to retry when waiting for the token.
+	WaitRetryTimes int `toml:"wait-retry-times" json:"wait-retry-times"`
+
 	// RequestUnit is the configuration determines the coefficients of the RRU and WRU cost.
 	// This configuration should be modified carefully.
 	RequestUnit RequestUnitConfig `toml:"request-unit" json:"request-unit"`
@@ -98,6 +108,8 @@ func DefaultConfig() *Config {
 	return &Config{
 		DegradedModeWaitDuration: NewDuration(defaultDegradedModeWaitDuration),
 		LTBMaxWaitDuration:       NewDuration(defaultMaxWaitDuration),
+		WaitRetryInterval:        NewDuration(defaultWaitRetryInterval),
+		WaitRetryTimes:           defaultWaitRetryTimes,
 		RequestUnit:              DefaultRequestUnitConfig(),
 		EnableControllerTraceLog: false,
 	}
@@ -155,6 +167,8 @@ type RUConfig struct {
 
 	// some config for client
 	LTBMaxWaitDuration       time.Duration
+	WaitRetryInterval        time.Duration
+	WaitRetryTimes           int
 	DegradedModeWaitDuration time.Duration
 }
 
@@ -176,6 +190,8 @@ func GenerateRUConfig(config *Config) *RUConfig {
 		WriteBytesCost:           RequestUnit(config.RequestUnit.WriteCostPerByte),
 		CPUMsCost:                RequestUnit(config.RequestUnit.CPUMsCost),
 		LTBMaxWaitDuration:       config.LTBMaxWaitDuration.Duration,
+		WaitRetryInterval:        config.WaitRetryInterval.Duration,
+		WaitRetryTimes:           config.WaitRetryTimes,
 		DegradedModeWaitDuration: config.DegradedModeWaitDuration.Duration,
 	}
 }
diff --git a/client/resource_group/controller/controller.go b/client/resource_group/controller/controller.go
index a695aaf82bc..a3482df9286 100755
--- a/client/resource_group/controller/controller.go
+++ b/client/resource_group/controller/controller.go
@@ -39,8 +39,6 @@ import (
 
 const (
 	controllerConfigPath    = "resource_group/controller"
-	maxRetry                = 10
-	retryInterval           = 50 * time.Millisecond
 	maxNotificationChanLen  = 200
 	needTokensAmplification = 1.1
 	trickleReserveDuration  = 1250 * time.Millisecond
@@ -105,6 +103,20 @@ func WithMaxWaitDuration(d time.Duration) ResourceControlCreateOption {
 	}
 }
 
+// WithWaitRetryTimes is the option to set the times to retry when waiting for the token.
+func WithWaitRetryInterval(d time.Duration) ResourceControlCreateOption {
+	return func(controller *ResourceGroupsController) {
+		controller.ruConfig.WaitRetryInterval = d
+	}
+}
+
+// WithWaitRetryTimes is the option to set the times to retry when waiting for the token.
+func WithWaitRetryTimes(times int) ResourceControlCreateOption {
+	return func(controller *ResourceGroupsController) {
+		controller.ruConfig.WaitRetryTimes = times
+	}
+}
+
 var _ ResourceGroupKVInterceptor = (*ResourceGroupsController)(nil)
 
 // ResourceGroupsController implements ResourceGroupKVInterceptor.
@@ -1206,7 +1218,7 @@ func (gc *groupCostController) onRequestWait(
 		var i int
 		var d time.Duration
 	retryLoop:
-		for i = 0; i < maxRetry; i++ {
+		for i = 0; i < gc.mainCfg.WaitRetryTimes; i++ {
 			switch gc.mode {
 			case rmpb.GroupMode_RawMode:
 				res := make([]*Reservation, 0, len(requestResourceLimitTypeList))
@@ -1230,8 +1242,8 @@ func (gc *groupCostController) onRequestWait(
 				}
 			}
 			gc.requestRetryCounter.Inc()
-			time.Sleep(retryInterval)
-			waitDuration += retryInterval
+			time.Sleep(gc.mainCfg.WaitRetryInterval)
+			waitDuration += gc.mainCfg.WaitRetryInterval
 		}
 		if err != nil {
 			gc.failedRequestCounter.Inc()