From 192009a272da4731305e051a3b2684e6244d1478 Mon Sep 17 00:00:00 2001 From: Zach Aller Date: Tue, 2 Jan 2024 09:59:54 -0600 Subject: [PATCH] fix: updates to replicas and pod template at the same time causes rollout to get stuck (#3272) * fix: fix the rollout stuck when pod/replicas changed together or canary strategy. Signed-off-by: Liming Liu * add one unit test case for empty canary service. Signed-off-by: Liming Liu * use rollout status to get the replicaset hash instead of service Signed-off-by: Zach Aller --------- Signed-off-by: Liming Liu Signed-off-by: Zach Aller Co-authored-by: Liming Liu --- rollout/canary.go | 7 +++++++ rollout/canary_test.go | 44 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/rollout/canary.go b/rollout/canary.go index b443db507e..f37e03bab1 100644 --- a/rollout/canary.go +++ b/rollout/canary.go @@ -435,6 +435,13 @@ func (c *rolloutContext) reconcileCanaryReplicaSets() (bool, error) { return true, nil } + // If we have updated both the replica count and the pod template hash c.newRS will be nil we want to reconcile the newRS so we look at the + // rollout status to get the newRS to reconcile it. + if c.newRS == nil && c.rollout.Status.CurrentPodHash != c.rollout.Status.StableRS { + rs, _ := replicasetutil.GetReplicaSetByTemplateHash(c.allRSs, c.rollout.Status.CurrentPodHash) + c.newRS = rs + } + scaledNewRS, err := c.reconcileNewReplicaSet() if err != nil { return false, err diff --git a/rollout/canary_test.go b/rollout/canary_test.go index 6fc3dda93c..d92cfd2c17 100644 --- a/rollout/canary_test.go +++ b/rollout/canary_test.go @@ -1,6 +1,7 @@ package rollout import ( + "context" "encoding/json" "fmt" "strconv" @@ -2009,3 +2010,46 @@ func TestIsDynamicallyRollingBackToStable(t *testing.T) { }) } } + +func TestCanaryReplicaAndSpecChangedTogether(t *testing.T) { + f := newFixture(t) + defer f.Close() + + originReplicas := 3 + r1 := newCanaryRollout("foo", originReplicas, nil, nil, nil, intstr.FromInt(1), intstr.FromInt(0)) + canarySVCName := "canary" + stableSVCName := "stable" + r1.Spec.Strategy.Canary.CanaryService = canarySVCName + r1.Spec.Strategy.Canary.StableService = stableSVCName + + stableRS := newReplicaSetWithStatus(r1, originReplicas, originReplicas) + stableSVC := newService(stableSVCName, 80, + map[string]string{v1alpha1.DefaultRolloutUniqueLabelKey: stableRS.Labels[v1alpha1.DefaultRolloutUniqueLabelKey]}, r1) + + r2 := bumpVersion(r1) + canaryRS := newReplicaSetWithStatus(r2, originReplicas, originReplicas) + canarySVC := newService(canarySVCName, 80, + map[string]string{v1alpha1.DefaultRolloutUniqueLabelKey: canaryRS.Labels[v1alpha1.DefaultRolloutUniqueLabelKey]}, r2) + + f.replicaSetLister = append(f.replicaSetLister, canaryRS, stableRS) + f.serviceLister = append(f.serviceLister, canarySVC, stableSVC) + + r3 := bumpVersion(r2) + r3.Spec.Replicas = pointer.Int32(int32(originReplicas) + 5) + r3.Status.StableRS = stableRS.Labels[v1alpha1.DefaultRolloutUniqueLabelKey] + r3.Status.CurrentPodHash = canaryRS.Labels[v1alpha1.DefaultRolloutUniqueLabelKey] + + f.rolloutLister = append(f.rolloutLister, r3) + f.kubeobjects = append(f.kubeobjects, canaryRS, stableRS, canarySVC, stableSVC) + f.objects = append(f.objects, r3) + + ctrl, _, _ := f.newController(noResyncPeriodFunc) + roCtx, err := ctrl.newRolloutContext(r3) + assert.NoError(t, err) + err = roCtx.reconcile() + assert.NoError(t, err) + updated, err := f.kubeclient.AppsV1().ReplicaSets(r3.Namespace).Get(context.Background(), canaryRS.Name, metav1.GetOptions{}) + assert.NoError(t, err) + // check the canary one is updated + assert.NotEqual(t, originReplicas, int(*updated.Spec.Replicas)) +}