Skip to content

Commit

Permalink
Flag for exclusive usage of template infos (with warnings)
Browse files Browse the repository at this point in the history
This is a second attempt at kubernetes#1021 (might also provides an alternate
solution for kubernetes#2892 and kubernetes#3608).

Per kubernetes#1021 discussion, a flag might be acceptable, if defaulting
to false and describing the limitations (not all cloud providers)
and the risk of using it (loss of accuracy, risk of upscaling
unusable nodes or leaving pending pods).

Some uses cases includes:
* Balance Similar when uspscaling from zero (but I believe kubernetes#3608 is a better approach)
* Edited/updated ASGs/MIGs taints and labels
* Updated instance type

Per previous discussion, the later two cases could be covered in
the long term by custom node-shape discovery Processors (open to
discuss that option too).
  • Loading branch information
bpineau committed Oct 13, 2020
1 parent 4043b6e commit 91e126a
Show file tree
Hide file tree
Showing 6 changed files with 58 additions and 15 deletions.
4 changes: 4 additions & 0 deletions cluster-autoscaler/config/autoscaling_options.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,4 +145,8 @@ type AutoscalingOptions struct {
// ClusterAPICloudConfigAuthoritative tells the Cluster API provider to treat the CloudConfig option as authoritative and
// not use KubeConfigPath as a fallback when it is not provided.
ClusterAPICloudConfigAuthoritative bool
// ScaleUpTemplateFromCloudProvider tells cluster-autoscaler to always use cloud-providers node groups (ASG, MIG, VMSS...)
// templates rather than templates built from real-world nodes. Warning: this isn't supported by all providers, gives less
// accurate informations than real-world nodes, and can lead to wrong upscale decisions.
ScaleUpTemplateFromCloudProvider bool
}
12 changes: 6 additions & 6 deletions cluster-autoscaler/core/scale_up_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -522,7 +522,7 @@ func runSimpleScaleUpTest(t *testing.T, config *scaleTestConfig) *scaleTestResul
}
context.ExpanderStrategy = expander

nodeInfos, _ := utils.GetNodeInfosForGroups(nodes, nil, provider, listers, []*appsv1.DaemonSet{}, context.PredicateChecker, nil)
nodeInfos, _ := utils.GetNodeInfosForGroups(nodes, nil, provider, listers, []*appsv1.DaemonSet{}, context.PredicateChecker, nil, false)
clusterState := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, newBackoff())
clusterState.UpdateNodes(nodes, nodeInfos, time.Now())

Expand Down Expand Up @@ -681,7 +681,7 @@ func TestScaleUpUnhealthy(t *testing.T) {
assert.NoError(t, err)

nodes := []*apiv1.Node{n1, n2}
nodeInfos, _ := utils.GetNodeInfosForGroups(nodes, nil, provider, listers, []*appsv1.DaemonSet{}, context.PredicateChecker, nil)
nodeInfos, _ := utils.GetNodeInfosForGroups(nodes, nil, provider, listers, []*appsv1.DaemonSet{}, context.PredicateChecker, nil, false)
clusterState := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, newBackoff())
clusterState.UpdateNodes(nodes, nodeInfos, time.Now())
p3 := BuildTestPod("p-new", 550, 0)
Expand Down Expand Up @@ -721,7 +721,7 @@ func TestScaleUpNoHelp(t *testing.T) {
assert.NoError(t, err)

nodes := []*apiv1.Node{n1}
nodeInfos, _ := utils.GetNodeInfosForGroups(nodes, nil, provider, listers, []*appsv1.DaemonSet{}, context.PredicateChecker, nil)
nodeInfos, _ := utils.GetNodeInfosForGroups(nodes, nil, provider, listers, []*appsv1.DaemonSet{}, context.PredicateChecker, nil, false)
clusterState := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, newBackoff())
clusterState.UpdateNodes(nodes, nodeInfos, time.Now())
p3 := BuildTestPod("p-new", 500, 0)
Expand Down Expand Up @@ -786,7 +786,7 @@ func TestScaleUpBalanceGroups(t *testing.T) {
context, err := NewScaleTestAutoscalingContext(options, &fake.Clientset{}, listers, provider, nil)
assert.NoError(t, err)

nodeInfos, _ := utils.GetNodeInfosForGroups(nodes, nil, provider, listers, []*appsv1.DaemonSet{}, context.PredicateChecker, nil)
nodeInfos, _ := utils.GetNodeInfosForGroups(nodes, nil, provider, listers, []*appsv1.DaemonSet{}, context.PredicateChecker, nil, false)
clusterState := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, newBackoff())
clusterState.UpdateNodes(nodes, nodeInfos, time.Now())

Expand Down Expand Up @@ -854,7 +854,7 @@ func TestScaleUpAutoprovisionedNodeGroup(t *testing.T) {
processors.NodeGroupManager = &mockAutoprovisioningNodeGroupManager{t, 0}

nodes := []*apiv1.Node{}
nodeInfos, _ := utils.GetNodeInfosForGroups(nodes, nil, provider, context.ListerRegistry, []*appsv1.DaemonSet{}, context.PredicateChecker, nil)
nodeInfos, _ := utils.GetNodeInfosForGroups(nodes, nil, provider, context.ListerRegistry, []*appsv1.DaemonSet{}, context.PredicateChecker, nil, false)

scaleUpStatus, err := ScaleUp(&context, processors, clusterState, []*apiv1.Pod{p1}, nodes, []*appsv1.DaemonSet{}, nodeInfos, nil)
assert.NoError(t, err)
Expand Down Expand Up @@ -907,7 +907,7 @@ func TestScaleUpBalanceAutoprovisionedNodeGroups(t *testing.T) {
processors.NodeGroupManager = &mockAutoprovisioningNodeGroupManager{t, 2}

nodes := []*apiv1.Node{}
nodeInfos, _ := utils.GetNodeInfosForGroups(nodes, nil, provider, context.ListerRegistry, []*appsv1.DaemonSet{}, context.PredicateChecker, nil)
nodeInfos, _ := utils.GetNodeInfosForGroups(nodes, nil, provider, context.ListerRegistry, []*appsv1.DaemonSet{}, context.PredicateChecker, nil, false)

scaleUpStatus, err := ScaleUp(&context, processors, clusterState, []*apiv1.Pod{p1, p2, p3}, nodes, []*appsv1.DaemonSet{}, nodeInfos, nil)
assert.NoError(t, err)
Expand Down
10 changes: 9 additions & 1 deletion cluster-autoscaler/core/static_autoscaler.go
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,15 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) errors.AutoscalerError
}

nodeInfosForGroups, autoscalerError := core_utils.GetNodeInfosForGroups(
readyNodes, a.nodeInfoCache, autoscalingContext.CloudProvider, autoscalingContext.ListerRegistry, daemonsets, autoscalingContext.PredicateChecker, a.ignoredTaints)
readyNodes,
a.nodeInfoCache,
autoscalingContext.CloudProvider,
autoscalingContext.ListerRegistry,
daemonsets,
autoscalingContext.PredicateChecker,
a.ignoredTaints,
autoscalingContext.ScaleUpTemplateFromCloudProvider,
)
if autoscalerError != nil {
klog.Errorf("Failed to get node infos for groups: %v", autoscalerError)
return autoscalerError.AddPrefix("failed to build node infos for node groups: ")
Expand Down
6 changes: 3 additions & 3 deletions cluster-autoscaler/core/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ import (
func GetNodeInfosForGroups(nodes []*apiv1.Node, nodeInfoCache map[string]*schedulerframework.NodeInfo, cloudProvider cloudprovider.CloudProvider, listers kube_util.ListerRegistry,
// TODO(mwielgus): This returns map keyed by url, while most code (including scheduler) uses node.Name for a key.
// TODO(mwielgus): Review error policy - sometimes we may continue with partial errors.
daemonsets []*appsv1.DaemonSet, predicateChecker simulator.PredicateChecker, ignoredTaints taints.TaintKeySet) (map[string]*schedulerframework.NodeInfo, errors.AutoscalerError) {
daemonsets []*appsv1.DaemonSet, predicateChecker simulator.PredicateChecker, ignoredTaints taints.TaintKeySet, useTemplatesOnly bool) (map[string]*schedulerframework.NodeInfo, errors.AutoscalerError) {
result := make(map[string]*schedulerframework.NodeInfo)
seenGroups := make(map[string]bool)

Expand Down Expand Up @@ -95,12 +95,12 @@ func GetNodeInfosForGroups(nodes []*apiv1.Node, nodeInfoCache map[string]*schedu
for _, nodeGroup := range cloudProvider.NodeGroups() {
id := nodeGroup.Id()
seenGroups[id] = true
if _, found := result[id]; found {
if _, found := result[id]; found && !useTemplatesOnly {
continue
}

// No good template, check cache of previously running nodes.
if nodeInfoCache != nil {
if nodeInfoCache != nil && !useTemplatesOnly {
if nodeInfo, found := nodeInfoCache[id]; found {
if nodeInfoCopy, err := deepCopyNodeInfo(nodeInfo); err == nil {
result[id] = nodeInfoCopy
Expand Down
39 changes: 34 additions & 5 deletions cluster-autoscaler/core/utils/utils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ func TestGetNodeInfosForGroups(t *testing.T) {
assert.NoError(t, err)

res, err := GetNodeInfosForGroups([]*apiv1.Node{unready4, unready3, ready2, ready1}, nil,
provider1, registry, []*appsv1.DaemonSet{}, predicateChecker, nil)
provider1, registry, []*appsv1.DaemonSet{}, predicateChecker, nil, false)
assert.NoError(t, err)
assert.Equal(t, 4, len(res))
info, found := res["ng1"]
Expand All @@ -88,9 +88,38 @@ func TestGetNodeInfosForGroups(t *testing.T) {

// Test for a nodegroup without nodes and TemplateNodeInfo not implemented by cloud proivder
res, err = GetNodeInfosForGroups([]*apiv1.Node{}, nil, provider2, registry,
[]*appsv1.DaemonSet{}, predicateChecker, nil)
[]*appsv1.DaemonSet{}, predicateChecker, nil, false)
assert.NoError(t, err)
assert.Equal(t, 0, len(res))

// Test exclusive usage of nodeInfo from cloud providers node group templates
realNode := BuildTestNode("n1", 1000, 1000)
SetNodeReadyState(realNode, true, time.Now())
templateNode := BuildTestNode("tn", 5000, 5000)
tni1 := schedulerframework.NewNodeInfo()
tni1.SetNode(templateNode)

provider3 := testprovider.NewTestAutoprovisioningCloudProvider(
nil, nil, nil, nil, nil,
map[string]*schedulerframework.NodeInfo{"ng1": tni})
provider3.AddNodeGroup("ng1", 1, 10, 1)
provider3.AddNode("ng1", realNode)

res, err = GetNodeInfosForGroups([]*apiv1.Node{realNode}, nil, provider3, registry,
[]*appsv1.DaemonSet{}, predicateChecker, nil, true) // use ASG template
assert.NoError(t, err)
assert.Equal(t, 1, len(res))
info, found = res["ng1"]
assert.True(t, found)
assertEqualNodeCapacities(t, templateNode, info.Node())

res, err = GetNodeInfosForGroups([]*apiv1.Node{realNode}, nil, provider3, registry,
[]*appsv1.DaemonSet{}, predicateChecker, nil, false) // use real-world node
assert.NoError(t, err)
assert.Equal(t, 1, len(res))
info, found = res["ng1"]
assert.True(t, found)
assertEqualNodeCapacities(t, realNode, info.Node())
}

func TestGetNodeInfosForGroupsCache(t *testing.T) {
Expand Down Expand Up @@ -142,7 +171,7 @@ func TestGetNodeInfosForGroupsCache(t *testing.T) {

// Fill cache
res, err := GetNodeInfosForGroups([]*apiv1.Node{unready4, unready3, ready2, ready1}, nodeInfoCache,
provider1, registry, []*appsv1.DaemonSet{}, predicateChecker, nil)
provider1, registry, []*appsv1.DaemonSet{}, predicateChecker, nil, false)
assert.NoError(t, err)
// Check results
assert.Equal(t, 4, len(res))
Expand Down Expand Up @@ -177,7 +206,7 @@ func TestGetNodeInfosForGroupsCache(t *testing.T) {

// Check cache with all nodes removed
res, err = GetNodeInfosForGroups([]*apiv1.Node{}, nodeInfoCache,
provider1, registry, []*appsv1.DaemonSet{}, predicateChecker, nil)
provider1, registry, []*appsv1.DaemonSet{}, predicateChecker, nil, false)
assert.NoError(t, err)
// Check results
assert.Equal(t, 2, len(res))
Expand All @@ -202,7 +231,7 @@ func TestGetNodeInfosForGroupsCache(t *testing.T) {
nodeInfoCache = map[string]*schedulerframework.NodeInfo{"ng4": infoNg4Node6}
// Check if cache was used
res, err = GetNodeInfosForGroups([]*apiv1.Node{ready1, ready2}, nodeInfoCache,
provider1, registry, []*appsv1.DaemonSet{}, predicateChecker, nil)
provider1, registry, []*appsv1.DaemonSet{}, predicateChecker, nil, false)
assert.NoError(t, err)
assert.Equal(t, 2, len(res))
info, found = res["ng2"]
Expand Down
2 changes: 2 additions & 0 deletions cluster-autoscaler/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ var (
awsUseStaticInstanceList = flag.Bool("aws-use-static-instance-list", false, "Should CA fetch instance types in runtime or use a static list. AWS only")
enableProfiling = flag.Bool("profiling", false, "Is debug/pprof endpoint enabled")
clusterAPICloudConfigAuthoritative = flag.Bool("clusterapi-cloud-config-authoritative", false, "Treat the cloud-config flag authoritatively (do not fallback to using kubeconfig flag). ClusterAPI only")
scaleUpTemplateFromCloudProvider = flag.Bool("scale-up-from-cloud-provider-template", false, "Build nodes templates from cloud providers node groups rather than real-world nodes. WARNING: this isn't supported by all cloud providers, and can lead to wrong autoscaling decisions (erroneous capacity evaluations causing infinite upscales, or pods left pending).")
)

func createAutoscalingOptions() config.AutoscalingOptions {
Expand Down Expand Up @@ -243,6 +244,7 @@ func createAutoscalingOptions() config.AutoscalingOptions {
NodeDeletionDelayTimeout: *nodeDeletionDelayTimeout,
AWSUseStaticInstanceList: *awsUseStaticInstanceList,
ClusterAPICloudConfigAuthoritative: *clusterAPICloudConfigAuthoritative,
ScaleUpTemplateFromCloudProvider: *scaleUpTemplateFromCloudProvider,
}
}

Expand Down

0 comments on commit 91e126a

Please sign in to comment.