From 28548705f88f96f5881ed8d51f051191c923786b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20K=C5=82obuszewski?= Date: Fri, 26 Aug 2022 10:03:12 +0200 Subject: [PATCH] Allow simulator to persist changes in cluster snapshot --- .../core/scaledown/legacy/legacy.go | 2 +- cluster-autoscaler/simulator/cluster.go | 32 +++++++++++++------ cluster-autoscaler/simulator/cluster_test.go | 10 +++--- 3 files changed, 29 insertions(+), 15 deletions(-) diff --git a/cluster-autoscaler/core/scaledown/legacy/legacy.go b/cluster-autoscaler/core/scaledown/legacy/legacy.go index d6fa113bee29..82aa29bf16be 100644 --- a/cluster-autoscaler/core/scaledown/legacy/legacy.go +++ b/cluster-autoscaler/core/scaledown/legacy/legacy.go @@ -65,7 +65,7 @@ type ScaleDown struct { // NewScaleDown builds new ScaleDown object. func NewScaleDown(context *context.AutoscalingContext, processors *processors.AutoscalingProcessors, clusterStateRegistry *clusterstate.ClusterStateRegistry, ndt *deletiontracker.NodeDeletionTracker) *ScaleDown { usageTracker := simulator.NewUsageTracker() - removalSimulator := simulator.NewRemovalSimulator(context.ListerRegistry, context.ClusterSnapshot, context.PredicateChecker, usageTracker) + removalSimulator := simulator.NewRemovalSimulator(context.ListerRegistry, context.ClusterSnapshot, context.PredicateChecker, usageTracker, false) unremovableNodes := unremovable.NewNodes() return &ScaleDown{ context: context, diff --git a/cluster-autoscaler/simulator/cluster.go b/cluster-autoscaler/simulator/cluster.go index 278a4c39d23b..ab3d0f38c346 100644 --- a/cluster-autoscaler/simulator/cluster.go +++ b/cluster-autoscaler/simulator/cluster.go @@ -101,15 +101,17 @@ type RemovalSimulator struct { clusterSnapshot ClusterSnapshot predicateChecker PredicateChecker usageTracker *UsageTracker + canPersist bool } // NewRemovalSimulator returns a new RemovalSimulator. -func NewRemovalSimulator(listers kube_util.ListerRegistry, clusterSnapshot ClusterSnapshot, predicateChecker PredicateChecker, usageTracker *UsageTracker) *RemovalSimulator { +func NewRemovalSimulator(listers kube_util.ListerRegistry, clusterSnapshot ClusterSnapshot, predicateChecker PredicateChecker, usageTracker *UsageTracker, persistSuccessfulSimulations bool) *RemovalSimulator { return &RemovalSimulator{ listers: listers, clusterSnapshot: clusterSnapshot, predicateChecker: predicateChecker, usageTracker: usageTracker, + canPersist: persistSuccessfulSimulations, } } @@ -174,7 +176,9 @@ func (r *RemovalSimulator) CheckNodeRemoval( return nil, &UnremovableNode{Node: nodeInfo.Node(), Reason: UnexpectedError} } - err = r.findPlaceFor(nodeName, podsToRemove, destinationMap, oldHints, newHints, timestamp) + err = r.withForkedSnapshot(func() error { + return r.findPlaceFor(nodeName, podsToRemove, destinationMap, oldHints, newHints, timestamp) + }) if err != nil { klog.V(2).Infof("node %s is not suitable for removal: %v", nodeName, err) return nil, &UnremovableNode{Node: nodeInfo.Node(), Reason: NoPlaceToMovePods} @@ -205,19 +209,29 @@ func (r *RemovalSimulator) FindEmptyNodesToRemove(candidates []string, timestamp return result } -func (r *RemovalSimulator) findPlaceFor(removedNode string, pods []*apiv1.Pod, nodes map[string]bool, - oldHints map[string]string, newHints map[string]string, timestamp time.Time) error { - - if err := r.clusterSnapshot.Fork(); err != nil { +func (r *RemovalSimulator) withForkedSnapshot(f func() error) (err error) { + if err = r.clusterSnapshot.Fork(); err != nil { return err } defer func() { - err := r.clusterSnapshot.Revert() - if err != nil { - klog.Fatalf("Got error when calling ClusterSnapshot.Revert(); %v", err) + if err == nil && r.canPersist { + cleanupErr := r.clusterSnapshot.Commit() + if cleanupErr != nil { + klog.Fatalf("Got error when calling ClusterSnapshot.Commit(); %v", cleanupErr) + } + } else { + cleanupErr := r.clusterSnapshot.Revert() + if cleanupErr != nil { + klog.Fatalf("Got error when calling ClusterSnapshot.Revert(); %v", cleanupErr) + } } }() + err = f() + return err +} +func (r *RemovalSimulator) findPlaceFor(removedNode string, pods []*apiv1.Pod, nodes map[string]bool, + oldHints map[string]string, newHints map[string]string, timestamp time.Time) error { podKey := func(pod *apiv1.Pod) string { return fmt.Sprintf("%s/%s", pod.Namespace, pod.Name) } diff --git a/cluster-autoscaler/simulator/cluster_test.go b/cluster-autoscaler/simulator/cluster_test.go index f7e516073005..79057d068c01 100644 --- a/cluster-autoscaler/simulator/cluster_test.go +++ b/cluster-autoscaler/simulator/cluster_test.go @@ -58,7 +58,7 @@ func TestFindPlaceAllOk(t *testing.T) { []*apiv1.Node{node1, node2}, []*apiv1.Pod{pod1}) - err = NewRemovalSimulator(nil, clusterSnapshot, predicateChecker, NewUsageTracker()).findPlaceFor( + err = NewRemovalSimulator(nil, clusterSnapshot, predicateChecker, NewUsageTracker(), false).findPlaceFor( "x", []*apiv1.Pod{new1, new2}, destinations, @@ -96,7 +96,7 @@ func TestFindPlaceAllBas(t *testing.T) { []*apiv1.Node{node1, node2}, []*apiv1.Pod{pod1}) - err = NewRemovalSimulator(nil, clusterSnapshot, predicateChecker, NewUsageTracker()).findPlaceFor( + err = NewRemovalSimulator(nil, clusterSnapshot, predicateChecker, NewUsageTracker(), false).findPlaceFor( "nbad", []*apiv1.Pod{new1, new2, new3}, destinations, @@ -129,7 +129,7 @@ func TestFindNone(t *testing.T) { []*apiv1.Node{node1, node2}, []*apiv1.Pod{pod1}) - err = NewRemovalSimulator(nil, clusterSnapshot, predicateChecker, NewUsageTracker()).findPlaceFor( + err = NewRemovalSimulator(nil, clusterSnapshot, predicateChecker, NewUsageTracker(), false).findPlaceFor( "x", []*apiv1.Pod{}, destinations, @@ -162,7 +162,7 @@ func TestFindEmptyNodes(t *testing.T) { clusterSnapshot := NewBasicClusterSnapshot() InitializeClusterSnapshotOrDie(t, clusterSnapshot, []*apiv1.Node{nodes[0], nodes[1], nodes[2], nodes[3]}, []*apiv1.Pod{pod1, pod2}) testTime := time.Date(2020, time.December, 18, 17, 0, 0, 0, time.UTC) - r := NewRemovalSimulator(nil, clusterSnapshot, nil, nil) + r := NewRemovalSimulator(nil, clusterSnapshot, nil, nil, false) emptyNodes := r.FindEmptyNodesToRemove(nodeNames, testTime) assert.Equal(t, []string{nodeNames[0], nodeNames[2], nodeNames[3]}, emptyNodes) } @@ -309,7 +309,7 @@ func TestFindNodesToRemove(t *testing.T) { destinations = append(destinations, node.Name) } InitializeClusterSnapshotOrDie(t, clusterSnapshot, test.allNodes, test.pods) - r := NewRemovalSimulator(registry, clusterSnapshot, predicateChecker, tracker) + r := NewRemovalSimulator(registry, clusterSnapshot, predicateChecker, tracker, false) toRemove, unremovable, _, err := r.FindNodesToRemove( test.candidates, destinations, map[string]string{}, time.Now(), []*policyv1.PodDisruptionBudget{})