Skip to content

Commit

Permalink
Allow simulator to persist changes in cluster snapshot
Browse files Browse the repository at this point in the history
  • Loading branch information
x13n committed Sep 15, 2022
1 parent 6419abf commit 2854870
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 15 deletions.
2 changes: 1 addition & 1 deletion cluster-autoscaler/core/scaledown/legacy/legacy.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ type ScaleDown struct {
// NewScaleDown builds new ScaleDown object.
func NewScaleDown(context *context.AutoscalingContext, processors *processors.AutoscalingProcessors, clusterStateRegistry *clusterstate.ClusterStateRegistry, ndt *deletiontracker.NodeDeletionTracker) *ScaleDown {
usageTracker := simulator.NewUsageTracker()
removalSimulator := simulator.NewRemovalSimulator(context.ListerRegistry, context.ClusterSnapshot, context.PredicateChecker, usageTracker)
removalSimulator := simulator.NewRemovalSimulator(context.ListerRegistry, context.ClusterSnapshot, context.PredicateChecker, usageTracker, false)
unremovableNodes := unremovable.NewNodes()
return &ScaleDown{
context: context,
Expand Down
32 changes: 23 additions & 9 deletions cluster-autoscaler/simulator/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,15 +101,17 @@ type RemovalSimulator struct {
clusterSnapshot ClusterSnapshot
predicateChecker PredicateChecker
usageTracker *UsageTracker
canPersist bool
}

// NewRemovalSimulator returns a new RemovalSimulator.
func NewRemovalSimulator(listers kube_util.ListerRegistry, clusterSnapshot ClusterSnapshot, predicateChecker PredicateChecker, usageTracker *UsageTracker) *RemovalSimulator {
func NewRemovalSimulator(listers kube_util.ListerRegistry, clusterSnapshot ClusterSnapshot, predicateChecker PredicateChecker, usageTracker *UsageTracker, persistSuccessfulSimulations bool) *RemovalSimulator {
return &RemovalSimulator{
listers: listers,
clusterSnapshot: clusterSnapshot,
predicateChecker: predicateChecker,
usageTracker: usageTracker,
canPersist: persistSuccessfulSimulations,
}
}

Expand Down Expand Up @@ -174,7 +176,9 @@ func (r *RemovalSimulator) CheckNodeRemoval(
return nil, &UnremovableNode{Node: nodeInfo.Node(), Reason: UnexpectedError}
}

err = r.findPlaceFor(nodeName, podsToRemove, destinationMap, oldHints, newHints, timestamp)
err = r.withForkedSnapshot(func() error {
return r.findPlaceFor(nodeName, podsToRemove, destinationMap, oldHints, newHints, timestamp)
})
if err != nil {
klog.V(2).Infof("node %s is not suitable for removal: %v", nodeName, err)
return nil, &UnremovableNode{Node: nodeInfo.Node(), Reason: NoPlaceToMovePods}
Expand Down Expand Up @@ -205,19 +209,29 @@ func (r *RemovalSimulator) FindEmptyNodesToRemove(candidates []string, timestamp
return result
}

func (r *RemovalSimulator) findPlaceFor(removedNode string, pods []*apiv1.Pod, nodes map[string]bool,
oldHints map[string]string, newHints map[string]string, timestamp time.Time) error {

if err := r.clusterSnapshot.Fork(); err != nil {
func (r *RemovalSimulator) withForkedSnapshot(f func() error) (err error) {
if err = r.clusterSnapshot.Fork(); err != nil {
return err
}
defer func() {
err := r.clusterSnapshot.Revert()
if err != nil {
klog.Fatalf("Got error when calling ClusterSnapshot.Revert(); %v", err)
if err == nil && r.canPersist {
cleanupErr := r.clusterSnapshot.Commit()
if cleanupErr != nil {
klog.Fatalf("Got error when calling ClusterSnapshot.Commit(); %v", cleanupErr)
}
} else {
cleanupErr := r.clusterSnapshot.Revert()
if cleanupErr != nil {
klog.Fatalf("Got error when calling ClusterSnapshot.Revert(); %v", cleanupErr)
}
}
}()
err = f()
return err
}

func (r *RemovalSimulator) findPlaceFor(removedNode string, pods []*apiv1.Pod, nodes map[string]bool,
oldHints map[string]string, newHints map[string]string, timestamp time.Time) error {
podKey := func(pod *apiv1.Pod) string {
return fmt.Sprintf("%s/%s", pod.Namespace, pod.Name)
}
Expand Down
10 changes: 5 additions & 5 deletions cluster-autoscaler/simulator/cluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ func TestFindPlaceAllOk(t *testing.T) {
[]*apiv1.Node{node1, node2},
[]*apiv1.Pod{pod1})

err = NewRemovalSimulator(nil, clusterSnapshot, predicateChecker, NewUsageTracker()).findPlaceFor(
err = NewRemovalSimulator(nil, clusterSnapshot, predicateChecker, NewUsageTracker(), false).findPlaceFor(
"x",
[]*apiv1.Pod{new1, new2},
destinations,
Expand Down Expand Up @@ -96,7 +96,7 @@ func TestFindPlaceAllBas(t *testing.T) {
[]*apiv1.Node{node1, node2},
[]*apiv1.Pod{pod1})

err = NewRemovalSimulator(nil, clusterSnapshot, predicateChecker, NewUsageTracker()).findPlaceFor(
err = NewRemovalSimulator(nil, clusterSnapshot, predicateChecker, NewUsageTracker(), false).findPlaceFor(
"nbad",
[]*apiv1.Pod{new1, new2, new3},
destinations,
Expand Down Expand Up @@ -129,7 +129,7 @@ func TestFindNone(t *testing.T) {
[]*apiv1.Node{node1, node2},
[]*apiv1.Pod{pod1})

err = NewRemovalSimulator(nil, clusterSnapshot, predicateChecker, NewUsageTracker()).findPlaceFor(
err = NewRemovalSimulator(nil, clusterSnapshot, predicateChecker, NewUsageTracker(), false).findPlaceFor(
"x",
[]*apiv1.Pod{},
destinations,
Expand Down Expand Up @@ -162,7 +162,7 @@ func TestFindEmptyNodes(t *testing.T) {
clusterSnapshot := NewBasicClusterSnapshot()
InitializeClusterSnapshotOrDie(t, clusterSnapshot, []*apiv1.Node{nodes[0], nodes[1], nodes[2], nodes[3]}, []*apiv1.Pod{pod1, pod2})
testTime := time.Date(2020, time.December, 18, 17, 0, 0, 0, time.UTC)
r := NewRemovalSimulator(nil, clusterSnapshot, nil, nil)
r := NewRemovalSimulator(nil, clusterSnapshot, nil, nil, false)
emptyNodes := r.FindEmptyNodesToRemove(nodeNames, testTime)
assert.Equal(t, []string{nodeNames[0], nodeNames[2], nodeNames[3]}, emptyNodes)
}
Expand Down Expand Up @@ -309,7 +309,7 @@ func TestFindNodesToRemove(t *testing.T) {
destinations = append(destinations, node.Name)
}
InitializeClusterSnapshotOrDie(t, clusterSnapshot, test.allNodes, test.pods)
r := NewRemovalSimulator(registry, clusterSnapshot, predicateChecker, tracker)
r := NewRemovalSimulator(registry, clusterSnapshot, predicateChecker, tracker, false)
toRemove, unremovable, _, err := r.FindNodesToRemove(
test.candidates, destinations, map[string]string{},
time.Now(), []*policyv1.PodDisruptionBudget{})
Expand Down

0 comments on commit 2854870

Please sign in to comment.