Skip to content

Commit

Permalink
fix: test cases failing for actuator and scaledown/eligibility
Browse files Browse the repository at this point in the history
- abstract default values into `config`
Signed-off-by: vadasambar <[email protected]>
  • Loading branch information
vadasambar committed Apr 11, 2023
1 parent 7fa229d commit acfd5a9
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 19 deletions.
10 changes: 10 additions & 0 deletions cluster-autoscaler/config/const.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ limitations under the License.

package config

import "time"

const (
// DefaultMaxClusterCores is the default maximum number of cores in the cluster.
DefaultMaxClusterCores = 5000 * 64
Expand All @@ -32,4 +34,12 @@ const (
DefaultScaleDownUnreadyTimeKey = "scaledownunreadytime"
// DefaultIgnoreDaemonSetsUtilizationKey identifies IgnoreDaemonSetsUtilization autoscaling option
DefaultIgnoreDaemonSetsUtilizationKey = "ignoredaemonsetsutilization"
// DefaultScaleDownUnneededTime identifies ScaleDownUnneededTime autoscaling option
DefaultScaleDownUnneededTime = 10 * time.Minute
// DefaultScaleDownUnreadyTime identifies ScaleDownUnreadyTime autoscaling option
DefaultScaleDownUnreadyTime = 20 * time.Minute
// DefaultScaleDownUtilizationThreshold identifies ScaleDownUtilizationThreshold autoscaling option
DefaultScaleDownUtilizationThreshold = 0.5
// DefaultScaleDownGpuUtilizationThreshold identifies ScaleDownGpuUtilizationThreshold autoscaling option
DefaultScaleDownGpuUtilizationThreshold = 0.5
)
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ func TestCropNodesToBudgets(t *testing.T) {
ndr.StartDeletionWithDrain("ng2", fmt.Sprintf("drain-node-%d", i))
}

actuator := NewActuator(ctx, nil, ndr, deleteOptions)
actuator := NewActuator(ctx, nil, ndr, deleteOptions, NewTestProcessors(ctx))
gotEmpty, gotDrain := actuator.cropNodesToBudgets(tc.emptyNodes, tc.drainNodes)
if diff := cmp.Diff(tc.wantEmpty, gotEmpty, cmpopts.EquateEmpty()); diff != "" {
t.Errorf("cropNodesToBudgets empty nodes diff (-want +got):\n%s", diff)
Expand Down
80 changes: 66 additions & 14 deletions cluster-autoscaler/core/scaledown/eligibility/eligibility_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,15 @@ import (
"k8s.io/client-go/kubernetes/fake"
)

type testCase struct {
desc string
nodes []*apiv1.Node
pods []*apiv1.Pod
want []string
scaleDownUnready bool
ignoreDaemonSetsUtilization bool
}

func TestFilterOutUnremovable(t *testing.T) {
now := time.Now()

Expand All @@ -59,13 +68,7 @@ func TestFilterOutUnremovable(t *testing.T) {
smallPod := BuildTestPod("smallPod", 100, 0)
smallPod.Spec.NodeName = "regular"

testCases := []struct {
desc string
nodes []*apiv1.Node
pods []*apiv1.Pod
want []string
scaleDownUnready bool
}{
testCases := []testCase{
{
desc: "regular node stays",
nodes: []*apiv1.Node{regularNode},
Expand Down Expand Up @@ -111,14 +114,32 @@ func TestFilterOutUnremovable(t *testing.T) {
scaleDownUnready: false,
},
}

allTestCases := testCases

// run all test cases with `TgnoreDaemonSetsUtilization` set to true
for _, tc := range testCases {
t := tc // shallow copy
t.ignoreDaemonSetsUtilization = true
allTestCases = append(allTestCases, t)
}

for _, tc := range allTestCases {
tc := tc
t.Run(tc.desc, func(t *testing.T) {
t.Parallel()
c := NewChecker(&staticThresholdGetter{0.5})
s := staticNodeGroupConfigProcessor{}
c := NewChecker(&s)
options := config.AutoscalingOptions{
UnremovableNodeRecheckTimeout: 5 * time.Minute,
ScaleDownUnreadyEnabled: tc.scaleDownUnready,
NodeGroupDefaults: config.NodeGroupAutoscalingOptions{
ScaleDownUtilizationThreshold: config.DefaultScaleDownUtilizationThreshold,
ScaleDownGpuUtilizationThreshold: config.DefaultScaleDownGpuUtilizationThreshold,
ScaleDownUnneededTime: config.DefaultScaleDownUnneededTime,
ScaleDownUnreadyTime: config.DefaultScaleDownUnreadyTime,
IgnoreDaemonSetsUtilization: tc.ignoreDaemonSetsUtilization,
},
}
provider := testprovider.NewTestCloudProvider(nil, nil)
provider.AddNodeGroup("ng1", 1, 10, 2)
Expand All @@ -137,14 +158,45 @@ func TestFilterOutUnremovable(t *testing.T) {
}
}

type staticThresholdGetter struct {
threshold float64
// type staticThresholdGetter struct {
// threshold float64
// }

type staticNodeGroupConfigProcessor struct {
}

// func (s *staticThresholdGetter) GetScaleDownUtilizationThreshold(_ *context.AutoscalingContext, _ cloudprovider.NodeGroup) (float64, error) {
// return s.threshold, nil
// }

// func (s *staticThresholdGetter) GetScaleDownGpuUtilizationThreshold(_ *context.AutoscalingContext, _ cloudprovider.NodeGroup) (float64, error) {
// return s.threshold, nil
// }

// GetScaleDownUnneededTime returns ScaleDownUnneededTime value that should be used for a given NodeGroup.
func (s *staticNodeGroupConfigProcessor) GetScaleDownUnneededTime(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (time.Duration, error) {
return context.NodeGroupDefaults.ScaleDownUnneededTime, nil
}

// GetScaleDownUnreadyTime returns ScaleDownUnreadyTime value that should be used for a given NodeGroup.
func (s *staticNodeGroupConfigProcessor) GetScaleDownUnreadyTime(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (time.Duration, error) {
return context.NodeGroupDefaults.ScaleDownUnreadyTime, nil
}

// GetScaleDownUtilizationThreshold returns ScaleDownUtilizationThreshold value that should be used for a given NodeGroup.
func (s *staticNodeGroupConfigProcessor) GetScaleDownUtilizationThreshold(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (float64, error) {
return context.NodeGroupDefaults.ScaleDownGpuUtilizationThreshold, nil
}

func (s *staticThresholdGetter) GetScaleDownUtilizationThreshold(_ *context.AutoscalingContext, _ cloudprovider.NodeGroup) (float64, error) {
return s.threshold, nil
// GetScaleDownGpuUtilizationThreshold returns ScaleDownGpuUtilizationThreshold value that should be used for a given NodeGroup.
func (s *staticNodeGroupConfigProcessor) GetScaleDownGpuUtilizationThreshold(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (float64, error) {
return context.NodeGroupDefaults.ScaleDownGpuUtilizationThreshold, nil
}

func (s *staticThresholdGetter) GetScaleDownGpuUtilizationThreshold(_ *context.AutoscalingContext, _ cloudprovider.NodeGroup) (float64, error) {
return s.threshold, nil
// GetIgnoreDaemonSetsUtilization returns IgnoreDaemonSetsUtilization value that should be used for a given NodeGroup.
func (s *staticNodeGroupConfigProcessor) GetIgnoreDaemonSetsUtilization(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (bool, error) {
return context.NodeGroupDefaults.IgnoreDaemonSetsUtilization, nil
}

// CleanUp cleans up processor's internal structures.
func (s *staticNodeGroupConfigProcessor) CleanUp() {}
8 changes: 4 additions & 4 deletions cluster-autoscaler/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,13 +107,13 @@ var (
"How long after node deletion that scale down evaluation resumes, defaults to scanInterval")
scaleDownDelayAfterFailure = flag.Duration("scale-down-delay-after-failure", 3*time.Minute,
"How long after scale down failure that scale down evaluation resumes")
scaleDownUnneededTime = flag.Duration("scale-down-unneeded-time", 10*time.Minute,
scaleDownUnneededTime = flag.Duration("scale-down-unneeded-time", config.DefaultScaleDownUnneededTime,
"How long a node should be unneeded before it is eligible for scale down")
scaleDownUnreadyTime = flag.Duration("scale-down-unready-time", 20*time.Minute,
scaleDownUnreadyTime = flag.Duration("scale-down-unready-time", config.DefaultScaleDownUnreadyTime,
"How long an unready node should be unneeded before it is eligible for scale down")
scaleDownUtilizationThreshold = flag.Float64("scale-down-utilization-threshold", 0.5,
scaleDownUtilizationThreshold = flag.Float64("scale-down-utilization-threshold", config.DefaultScaleDownUtilizationThreshold,
"Sum of cpu or memory of all pods running on the node divided by node's corresponding allocatable resource, below which a node can be considered for scale down")
scaleDownGpuUtilizationThreshold = flag.Float64("scale-down-gpu-utilization-threshold", 0.5,
scaleDownGpuUtilizationThreshold = flag.Float64("scale-down-gpu-utilization-threshold", config.DefaultScaleDownGpuUtilizationThreshold,
"Sum of gpu requests of all pods running on the node divided by node's allocatable resource, below which a node can be considered for scale down."+
"Utilization calculation only cares about gpu resource for accelerator node. cpu and memory utilization will be ignored.")
scaleDownNonEmptyCandidatesCount = flag.Int("scale-down-non-empty-candidates-count", 30,
Expand Down

0 comments on commit acfd5a9

Please sign in to comment.