From abc80295cd648d3db77cbbf78b632ab221f05da3 Mon Sep 17 00:00:00 2001 From: Hui Kang Date: Wed, 12 Jan 2022 22:15:42 -0500 Subject: [PATCH 1/2] fix: continue update process in middle of update if spec.replicas is 0 - if canary.spec.replicas==0 as in the canary steps, update continues Signed-off-by: Hui Kang --- rollout/canary_test.go | 2 +- utils/replicaset/replicaset.go | 2 +- utils/replicaset/replicaset_test.go | 3 +-- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/rollout/canary_test.go b/rollout/canary_test.go index 6ec62ba50d..e885c4c76e 100644 --- a/rollout/canary_test.go +++ b/rollout/canary_test.go @@ -1184,7 +1184,7 @@ func TestCanarySVCSelectors(t *testing.T) { shouldTargetNewRS bool }{ - {0, 0, false}, + {0, 0, true}, {2, 0, false}, {2, 1, false}, {2, 2, true}, diff --git a/utils/replicaset/replicaset.go b/utils/replicaset/replicaset.go index ce72d90e56..aed35b8e58 100644 --- a/utils/replicaset/replicaset.go +++ b/utils/replicaset/replicaset.go @@ -630,5 +630,5 @@ func IsReplicaSetReady(rs *appsv1.ReplicaSet) bool { } replicas := rs.Spec.Replicas readyReplicas := rs.Status.ReadyReplicas - return replicas != nil && *replicas != 0 && readyReplicas != 0 && *replicas <= readyReplicas + return (replicas != nil && *replicas != 0 && readyReplicas != 0 && *replicas <= readyReplicas) || (replicas != nil && *replicas == 0) } diff --git a/utils/replicaset/replicaset_test.go b/utils/replicaset/replicaset_test.go index 89c1933d9f..348aa792de 100644 --- a/utils/replicaset/replicaset_test.go +++ b/utils/replicaset/replicaset_test.go @@ -1295,7 +1295,6 @@ func TestIsReplicaSetReady(t *testing.T) { ReadyReplicas: 0, }, } - // NOTE: currently consider scaled down replicas as not ready - assert.False(t, IsReplicaSetReady(&rs)) + assert.True(t, IsReplicaSetReady(&rs)) } } From 4d1e68d48a45aa430ac7a8fe69dca7cabc355a83 Mon Sep 17 00:00:00 2001 From: Hui Kang Date: Sat, 15 Jan 2022 00:24:09 -0500 Subject: [PATCH 2/2] Use traffic weight reference to determine if it is safe to scale down intermediate replicaset Signed-off-by: Hui Kang --- rollout/canary.go | 25 ++++-- rollout/canary_test.go | 10 ++- .../istio-host-split-update-in-middle.yaml | 86 +++++++++++++++++++ test/e2e/istio_test.go | 41 ++++++--- utils/replicaset/replicaset.go | 2 +- utils/replicaset/replicaset_test.go | 3 +- 6 files changed, 144 insertions(+), 23 deletions(-) create mode 100644 test/e2e/istio/istio-host-split-update-in-middle.yaml diff --git a/rollout/canary.go b/rollout/canary.go index 3d04707cb8..4aca373efe 100644 --- a/rollout/canary.go +++ b/rollout/canary.go @@ -216,15 +216,11 @@ func (c *rolloutContext) scaleDownOldReplicaSetsForCanary(oldRSs []*appsv1.Repli // and doesn't yet have scale down deadline. This happens when a user changes their // mind in the middle of an V1 -> V2 update, and then applies a V3. We are deciding // what to do with the defunct, intermediate V2 ReplicaSet right now. - if replicasetutil.IsReplicaSetReady(c.newRS) && replicasetutil.IsReplicaSetReady(c.stableRS) { - // If the both new and old RS are available, we can infer that it is safe to - // scale down this ReplicaSet, since traffic should have shifted away from this RS. - // TODO: instead of checking availability of canary/stable, a better way to determine - // if it is safe to scale this down, is to check if traffic is directed to the RS. - // In other words, we can check c.rollout.Status.Canary.Weights to see if this - // ReplicaSet hash is still referenced, and scale it down otherwise. + if !c.replicaSetReferencedByCanaryTraffic(targetRS) { + // It is safe to scale the intermediate RS down, if no traffic is directed to it. c.log.Infof("scaling down intermediate RS '%s'", targetRS.Name) } else { + c.log.Infof("DEBUG CANNOT scaling down intermediate RS '%s'", targetRS.Name) // The current and stable ReplicaSets have not reached readiness. This implies // we might not have shifted traffic away from this ReplicaSet so we need to // keep this scaled up. @@ -249,6 +245,21 @@ func (c *rolloutContext) scaleDownOldReplicaSetsForCanary(oldRSs []*appsv1.Repli return totalScaledDown, nil } +func (c *rolloutContext) replicaSetReferencedByCanaryTraffic(rs *appsv1.ReplicaSet) bool { + rsPodHash := replicasetutil.GetPodTemplateHash(rs) + ro := c.rollout + + if ro.Status.Canary.Weights == nil { + return false + } + + if ro.Status.Canary.Weights.Canary.PodTemplateHash == rsPodHash || ro.Status.Canary.Weights.Stable.PodTemplateHash == rsPodHash { + return true + } + + return false +} + // canProceedWithScaleDownAnnotation returns whether or not it is safe to proceed with annotating // old replicasets with the scale-down-deadline in the traffic-routed canary strategy. // This method only matters with ALB canary + the target group verification feature. diff --git a/rollout/canary_test.go b/rollout/canary_test.go index e885c4c76e..754a916660 100644 --- a/rollout/canary_test.go +++ b/rollout/canary_test.go @@ -24,6 +24,7 @@ import ( "github.com/argoproj/argo-rollouts/utils/conditions" logutil "github.com/argoproj/argo-rollouts/utils/log" "github.com/argoproj/argo-rollouts/utils/record" + replicasetutil "github.com/argoproj/argo-rollouts/utils/replicaset" rolloututil "github.com/argoproj/argo-rollouts/utils/rollout" timeutil "github.com/argoproj/argo-rollouts/utils/time" ) @@ -738,6 +739,11 @@ func TestCanaryDontScaleDownOldRsDuringInterruptedUpdate(t *testing.T) { rs1 := newReplicaSetWithStatus(r1, 5, 5) rs2 := newReplicaSetWithStatus(r2, 5, 5) rs3 := newReplicaSetWithStatus(r3, 5, 0) + r3.Status.Canary.Weights = &v1alpha1.TrafficWeights{ + Canary: v1alpha1.WeightDestination{ + PodTemplateHash: replicasetutil.GetPodTemplateHash(rs2), + }, + } f.objects = append(f.objects, r3) f.kubeobjects = append(f.kubeobjects, rs1, rs2, rs3, canarySvc, stableSvc) @@ -751,7 +757,7 @@ func TestCanaryDontScaleDownOldRsDuringInterruptedUpdate(t *testing.T) { // TestCanaryScaleDownOldRsDuringInterruptedUpdate tests that we proceed with scale down of an // intermediate V2 ReplicaSet when applying a V3 spec in the middle of updating a traffic routed // canary going from V1 -> V2 (i.e. after we have shifted traffic away from V2). This test is the -// same as TestCanaryDontScaleDownOldRsDuringUpdate but rs3 is fully available +// same as TestCanaryDontScaleDownOldRsDuringInterruptedUpdate but rs3 is fully available func TestCanaryScaleDownOldRsDuringInterruptedUpdate(t *testing.T) { f := newFixture(t) defer f.Close() @@ -1184,7 +1190,7 @@ func TestCanarySVCSelectors(t *testing.T) { shouldTargetNewRS bool }{ - {0, 0, true}, + {0, 0, false}, {2, 0, false}, {2, 1, false}, {2, 2, true}, diff --git a/test/e2e/istio/istio-host-split-update-in-middle.yaml b/test/e2e/istio/istio-host-split-update-in-middle.yaml new file mode 100644 index 0000000000..f1d6821b18 --- /dev/null +++ b/test/e2e/istio/istio-host-split-update-in-middle.yaml @@ -0,0 +1,86 @@ +apiVersion: v1 +kind: Service +metadata: + name: istio-host-split-canary +spec: + ports: + - port: 80 + targetPort: http + protocol: TCP + name: http + selector: + app: istio-host-split + +--- +apiVersion: v1 +kind: Service +metadata: + name: istio-host-split-stable +spec: + ports: + - port: 80 + targetPort: http + protocol: TCP + name: http + selector: + app: istio-host-split + +--- +apiVersion: networking.istio.io/v1alpha3 +kind: VirtualService +metadata: + name: istio-host-split-vsvc +spec: + hosts: + - istio-host-split + http: + - name: primary + route: + - destination: + host: istio-host-split-stable + weight: 100 + - destination: + host: istio-host-split-canary + weight: 0 + +--- +apiVersion: argoproj.io/v1alpha1 +kind: Rollout +metadata: + name: istio-host-split +spec: + replicas: 2 + strategy: + canary: + canaryService: istio-host-split-canary + stableService: istio-host-split-stable + trafficRouting: + istio: + virtualService: + name: istio-host-split-vsvc + routes: + - primary + steps: + - setWeight: 0 + - setCanaryScale: + replicas: 1 + - pause: {} + selector: + matchLabels: + app: istio-host-split + template: + metadata: + labels: + app: istio-host-split + spec: + containers: + - name: istio-host-split + image: nginx:1.19-alpine + ports: + - name: http + containerPort: 80 + protocol: TCP + resources: + requests: + memory: 16Mi + cpu: 5m diff --git a/test/e2e/istio_test.go b/test/e2e/istio_test.go index 47710b5110..478d7eca05 100644 --- a/test/e2e/istio_test.go +++ b/test/e2e/istio_test.go @@ -257,6 +257,25 @@ func (s *IstioSuite) TestIstioAbortUpdate() { ExpectRevisionPodCount("2", 1) } +func (s *IstioSuite) TestIstioUpdateInMiddleZeroCanaryReplicas() { + s.Given(). + RolloutObjects("@istio/istio-host-split-update-in-middle.yaml"). + When(). + ApplyManifests(). + WaitForRolloutStatus("Healthy"). + Then(). + When(). + UpdateSpec(). + WaitForRolloutStatus("Paused"). + Then(). + ExpectRevisionPodCount("2", 1). + When(). + UpdateSpec(). + WaitForRolloutStatus("Paused"). + Then(). + ExpectRevisionPodCount("3", 1) +} + func (s *IstioSuite) TestIstioAbortUpdateDeleteAllCanaryPods() { s.Given(). RolloutObjects("@istio/istio-rollout-abort-delete-all-canary-pods.yaml"). @@ -361,17 +380,17 @@ func (s *IstioSuite) TestIstioSubsetSplitExperimentStep() { WaitForRolloutStatus("Healthy"). Then(). Assert(func(t *fixtures.Then) { - vsvc := t.GetVirtualService() - assert.Equal(s.T(), int64(100), vsvc.Spec.HTTP[0].Route[0].Weight) // stable - assert.Equal(s.T(), int64(0), vsvc.Spec.HTTP[0].Route[1].Weight) // canary + vsvc := t.GetVirtualService() + assert.Equal(s.T(), int64(100), vsvc.Spec.HTTP[0].Route[0].Weight) // stable + assert.Equal(s.T(), int64(0), vsvc.Spec.HTTP[0].Route[1].Weight) // canary - rs1 := t.GetReplicaSetByRevision("1") - destrule := t.GetDestinationRule() - assert.Len(s.T(), destrule.Spec.Subsets, 2) - assert.Equal(s.T(), rs1.Spec.Template.Labels[v1alpha1.DefaultRolloutUniqueLabelKey], destrule.Spec.Subsets[0].Labels[v1alpha1.DefaultRolloutUniqueLabelKey]) // stable - assert.Equal(s.T(), rs1.Spec.Template.Labels[v1alpha1.DefaultRolloutUniqueLabelKey], destrule.Spec.Subsets[1].Labels[v1alpha1.DefaultRolloutUniqueLabelKey]) // canary + rs1 := t.GetReplicaSetByRevision("1") + destrule := t.GetDestinationRule() + assert.Len(s.T(), destrule.Spec.Subsets, 2) + assert.Equal(s.T(), rs1.Spec.Template.Labels[v1alpha1.DefaultRolloutUniqueLabelKey], destrule.Spec.Subsets[0].Labels[v1alpha1.DefaultRolloutUniqueLabelKey]) // stable + assert.Equal(s.T(), rs1.Spec.Template.Labels[v1alpha1.DefaultRolloutUniqueLabelKey], destrule.Spec.Subsets[1].Labels[v1alpha1.DefaultRolloutUniqueLabelKey]) // canary - }). + }). When(). UpdateSpec(). WaitForRolloutCanaryStepIndex(1). @@ -401,7 +420,7 @@ func (s *IstioSuite) TestIstioSubsetSplitExperimentStep() { Assert(func(t *fixtures.Then) { vsvc := t.GetVirtualService() assert.Equal(s.T(), int64(100), vsvc.Spec.HTTP[0].Route[0].Weight) // stable - assert.Equal(s.T(), int64(0), vsvc.Spec.HTTP[0].Route[1].Weight) // canary + assert.Equal(s.T(), int64(0), vsvc.Spec.HTTP[0].Route[1].Weight) // canary destrule := t.GetDestinationRule() rs2 := t.GetReplicaSetByRevision("2") @@ -413,5 +432,3 @@ func (s *IstioSuite) TestIstioSubsetSplitExperimentStep() { s.TearDownSuite() } - - diff --git a/utils/replicaset/replicaset.go b/utils/replicaset/replicaset.go index aed35b8e58..ce72d90e56 100644 --- a/utils/replicaset/replicaset.go +++ b/utils/replicaset/replicaset.go @@ -630,5 +630,5 @@ func IsReplicaSetReady(rs *appsv1.ReplicaSet) bool { } replicas := rs.Spec.Replicas readyReplicas := rs.Status.ReadyReplicas - return (replicas != nil && *replicas != 0 && readyReplicas != 0 && *replicas <= readyReplicas) || (replicas != nil && *replicas == 0) + return replicas != nil && *replicas != 0 && readyReplicas != 0 && *replicas <= readyReplicas } diff --git a/utils/replicaset/replicaset_test.go b/utils/replicaset/replicaset_test.go index 348aa792de..89c1933d9f 100644 --- a/utils/replicaset/replicaset_test.go +++ b/utils/replicaset/replicaset_test.go @@ -1295,6 +1295,7 @@ func TestIsReplicaSetReady(t *testing.T) { ReadyReplicas: 0, }, } - assert.True(t, IsReplicaSetReady(&rs)) + // NOTE: currently consider scaled down replicas as not ready + assert.False(t, IsReplicaSetReady(&rs)) } }