fix: test cases failing for actuator and scaledown/eligibility

- abstract default values into `config` Signed-off-by: vadasambar <[email protected]>
kubernetes · Apr 11, 2023 · acfd5a9 · acfd5a9
1 parent 7fa229d
commit acfd5a9
Show file tree

Hide file tree

Showing 4 changed files with 81 additions and 19 deletions.
diff --git a/cluster-autoscaler/config/const.go b/cluster-autoscaler/config/const.go
@@ -16,6 +16,8 @@ limitations under the License.
 
 package config
 
+import "time"
+
 const (
 	// DefaultMaxClusterCores is the default maximum number of cores in the cluster.
 	DefaultMaxClusterCores = 5000 * 64
@@ -32,4 +34,12 @@ const (
 	DefaultScaleDownUnreadyTimeKey = "scaledownunreadytime"
 	// DefaultIgnoreDaemonSetsUtilizationKey identifies IgnoreDaemonSetsUtilization autoscaling option
 	DefaultIgnoreDaemonSetsUtilizationKey = "ignoredaemonsetsutilization"
+	// DefaultScaleDownUnneededTime identifies ScaleDownUnneededTime autoscaling option
+	DefaultScaleDownUnneededTime = 10 * time.Minute
+	// DefaultScaleDownUnreadyTime identifies ScaleDownUnreadyTime autoscaling option
+	DefaultScaleDownUnreadyTime = 20 * time.Minute
+	// DefaultScaleDownUtilizationThreshold identifies ScaleDownUtilizationThreshold autoscaling option
+	DefaultScaleDownUtilizationThreshold = 0.5
+	// DefaultScaleDownGpuUtilizationThreshold identifies ScaleDownGpuUtilizationThreshold autoscaling option
+	DefaultScaleDownGpuUtilizationThreshold = 0.5
 )
diff --git a/cluster-autoscaler/core/scaledown/actuation/actuator_test.go b/cluster-autoscaler/core/scaledown/actuation/actuator_test.go
@@ -241,7 +241,7 @@ func TestCropNodesToBudgets(t *testing.T) {
 				ndr.StartDeletionWithDrain("ng2", fmt.Sprintf("drain-node-%d", i))
 			}
 
-			actuator := NewActuator(ctx, nil, ndr, deleteOptions)
+			actuator := NewActuator(ctx, nil, ndr, deleteOptions, NewTestProcessors(ctx))
 			gotEmpty, gotDrain := actuator.cropNodesToBudgets(tc.emptyNodes, tc.drainNodes)
 			if diff := cmp.Diff(tc.wantEmpty, gotEmpty, cmpopts.EquateEmpty()); diff != "" {
 				t.Errorf("cropNodesToBudgets empty nodes diff (-want +got):\n%s", diff)

diff --git a/cluster-autoscaler/core/scaledown/eligibility/eligibility_test.go b/cluster-autoscaler/core/scaledown/eligibility/eligibility_test.go
@@ -36,6 +36,15 @@ import (
 	"k8s.io/client-go/kubernetes/fake"
 )
 
+type testCase struct {
+	desc                        string
+	nodes                       []*apiv1.Node
+	pods                        []*apiv1.Pod
+	want                        []string
+	scaleDownUnready            bool
+	ignoreDaemonSetsUtilization bool
+}
+
 func TestFilterOutUnremovable(t *testing.T) {
 	now := time.Now()
 
@@ -59,13 +68,7 @@ func TestFilterOutUnremovable(t *testing.T) {
 	smallPod := BuildTestPod("smallPod", 100, 0)
 	smallPod.Spec.NodeName = "regular"
 
-	testCases := []struct {
-		desc             string
-		nodes            []*apiv1.Node
-		pods             []*apiv1.Pod
-		want             []string
-		scaleDownUnready bool
-	}{
+	testCases := []testCase{
 		{
 			desc:             "regular node stays",
 			nodes:            []*apiv1.Node{regularNode},
@@ -111,14 +114,32 @@ func TestFilterOutUnremovable(t *testing.T) {
 			scaleDownUnready: false,
 		},
 	}
+
+	allTestCases := testCases
+
+	// run all test cases with `TgnoreDaemonSetsUtilization` set to true
 	for _, tc := range testCases {
+		t := tc // shallow copy
+		t.ignoreDaemonSetsUtilization = true
+		allTestCases = append(allTestCases, t)
+	}
+
+	for _, tc := range allTestCases {
 		tc := tc
 		t.Run(tc.desc, func(t *testing.T) {
 			t.Parallel()
-			c := NewChecker(&staticThresholdGetter{0.5})
+			s := staticNodeGroupConfigProcessor{}
+			c := NewChecker(&s)
 			options := config.AutoscalingOptions{
 				UnremovableNodeRecheckTimeout: 5 * time.Minute,
 				ScaleDownUnreadyEnabled:       tc.scaleDownUnready,
+				NodeGroupDefaults: config.NodeGroupAutoscalingOptions{
+					ScaleDownUtilizationThreshold:    config.DefaultScaleDownUtilizationThreshold,
+					ScaleDownGpuUtilizationThreshold: config.DefaultScaleDownGpuUtilizationThreshold,
+					ScaleDownUnneededTime:            config.DefaultScaleDownUnneededTime,
+					ScaleDownUnreadyTime:             config.DefaultScaleDownUnreadyTime,
+					IgnoreDaemonSetsUtilization:      tc.ignoreDaemonSetsUtilization,
+				},
 			}
 			provider := testprovider.NewTestCloudProvider(nil, nil)
 			provider.AddNodeGroup("ng1", 1, 10, 2)
@@ -137,14 +158,45 @@ func TestFilterOutUnremovable(t *testing.T) {
 	}
 }
 
-type staticThresholdGetter struct {
-	threshold float64
+// type staticThresholdGetter struct {
+// 	threshold float64
+// }
+
+type staticNodeGroupConfigProcessor struct {
+}
+
+// func (s *staticThresholdGetter) GetScaleDownUtilizationThreshold(_ *context.AutoscalingContext, _ cloudprovider.NodeGroup) (float64, error) {
+// 	return s.threshold, nil
+// }
+
+// func (s *staticThresholdGetter) GetScaleDownGpuUtilizationThreshold(_ *context.AutoscalingContext, _ cloudprovider.NodeGroup) (float64, error) {
+// 	return s.threshold, nil
+// }
+
+// GetScaleDownUnneededTime returns ScaleDownUnneededTime value that should be used for a given NodeGroup.
+func (s *staticNodeGroupConfigProcessor) GetScaleDownUnneededTime(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (time.Duration, error) {
+	return context.NodeGroupDefaults.ScaleDownUnneededTime, nil
+}
+
+// GetScaleDownUnreadyTime returns ScaleDownUnreadyTime value that should be used for a given NodeGroup.
+func (s *staticNodeGroupConfigProcessor) GetScaleDownUnreadyTime(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (time.Duration, error) {
+	return context.NodeGroupDefaults.ScaleDownUnreadyTime, nil
+}
+
+// GetScaleDownUtilizationThreshold returns ScaleDownUtilizationThreshold value that should be used for a given NodeGroup.
+func (s *staticNodeGroupConfigProcessor) GetScaleDownUtilizationThreshold(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (float64, error) {
+	return context.NodeGroupDefaults.ScaleDownGpuUtilizationThreshold, nil
 }
 
-func (s *staticThresholdGetter) GetScaleDownUtilizationThreshold(_ *context.AutoscalingContext, _ cloudprovider.NodeGroup) (float64, error) {
-	return s.threshold, nil
+// GetScaleDownGpuUtilizationThreshold returns ScaleDownGpuUtilizationThreshold value that should be used for a given NodeGroup.
+func (s *staticNodeGroupConfigProcessor) GetScaleDownGpuUtilizationThreshold(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (float64, error) {
+	return context.NodeGroupDefaults.ScaleDownGpuUtilizationThreshold, nil
 }
 
-func (s *staticThresholdGetter) GetScaleDownGpuUtilizationThreshold(_ *context.AutoscalingContext, _ cloudprovider.NodeGroup) (float64, error) {
-	return s.threshold, nil
+// GetIgnoreDaemonSetsUtilization returns IgnoreDaemonSetsUtilization value that should be used for a given NodeGroup.
+func (s *staticNodeGroupConfigProcessor) GetIgnoreDaemonSetsUtilization(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (bool, error) {
+	return context.NodeGroupDefaults.IgnoreDaemonSetsUtilization, nil
 }
+
+// CleanUp cleans up processor's internal structures.
+func (s *staticNodeGroupConfigProcessor) CleanUp() {}
diff --git a/cluster-autoscaler/main.go b/cluster-autoscaler/main.go
@@ -107,13 +107,13 @@ var (
 		"How long after node deletion that scale down evaluation resumes, defaults to scanInterval")
 	scaleDownDelayAfterFailure = flag.Duration("scale-down-delay-after-failure", 3*time.Minute,
 		"How long after scale down failure that scale down evaluation resumes")
-	scaleDownUnneededTime = flag.Duration("scale-down-unneeded-time", 10*time.Minute,
+	scaleDownUnneededTime = flag.Duration("scale-down-unneeded-time", config.DefaultScaleDownUnneededTime,
 		"How long a node should be unneeded before it is eligible for scale down")
-	scaleDownUnreadyTime = flag.Duration("scale-down-unready-time", 20*time.Minute,
+	scaleDownUnreadyTime = flag.Duration("scale-down-unready-time", config.DefaultScaleDownUnreadyTime,
 		"How long an unready node should be unneeded before it is eligible for scale down")
-	scaleDownUtilizationThreshold = flag.Float64("scale-down-utilization-threshold", 0.5,
+	scaleDownUtilizationThreshold = flag.Float64("scale-down-utilization-threshold", config.DefaultScaleDownUtilizationThreshold,
 		"Sum of cpu or memory of all pods running on the node divided by node's corresponding allocatable resource, below which a node can be considered for scale down")
-	scaleDownGpuUtilizationThreshold = flag.Float64("scale-down-gpu-utilization-threshold", 0.5,
+	scaleDownGpuUtilizationThreshold = flag.Float64("scale-down-gpu-utilization-threshold", config.DefaultScaleDownGpuUtilizationThreshold,
 		"Sum of gpu requests of all pods running on the node divided by node's allocatable resource, below which a node can be considered for scale down."+
 			"Utilization calculation only cares about gpu resource for accelerator node. cpu and memory utilization will be ignored.")
 	scaleDownNonEmptyCandidatesCount = flag.Int("scale-down-non-empty-candidates-count", 30,