Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: continue update process in middle of update if spec.replicas is 0 #1764

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 18 additions & 7 deletions rollout/canary.go
Original file line number Diff line number Diff line change
Expand Up @@ -216,15 +216,11 @@ func (c *rolloutContext) scaleDownOldReplicaSetsForCanary(oldRSs []*appsv1.Repli
// and doesn't yet have scale down deadline. This happens when a user changes their
// mind in the middle of an V1 -> V2 update, and then applies a V3. We are deciding
// what to do with the defunct, intermediate V2 ReplicaSet right now.
if replicasetutil.IsReplicaSetReady(c.newRS) && replicasetutil.IsReplicaSetReady(c.stableRS) {
// If the both new and old RS are available, we can infer that it is safe to
// scale down this ReplicaSet, since traffic should have shifted away from this RS.
// TODO: instead of checking availability of canary/stable, a better way to determine
// if it is safe to scale this down, is to check if traffic is directed to the RS.
// In other words, we can check c.rollout.Status.Canary.Weights to see if this
// ReplicaSet hash is still referenced, and scale it down otherwise.
if !c.replicaSetReferencedByCanaryTraffic(targetRS) {
// It is safe to scale the intermediate RS down, if no traffic is directed to it.
c.log.Infof("scaling down intermediate RS '%s'", targetRS.Name)
} else {
c.log.Infof("DEBUG CANNOT scaling down intermediate RS '%s'", targetRS.Name)
// The current and stable ReplicaSets have not reached readiness. This implies
// we might not have shifted traffic away from this ReplicaSet so we need to
// keep this scaled up.
Expand All @@ -249,6 +245,21 @@ func (c *rolloutContext) scaleDownOldReplicaSetsForCanary(oldRSs []*appsv1.Repli
return totalScaledDown, nil
}

func (c *rolloutContext) replicaSetReferencedByCanaryTraffic(rs *appsv1.ReplicaSet) bool {
rsPodHash := replicasetutil.GetPodTemplateHash(rs)
ro := c.rollout

if ro.Status.Canary.Weights == nil {
return false
}

if ro.Status.Canary.Weights.Canary.PodTemplateHash == rsPodHash || ro.Status.Canary.Weights.Stable.PodTemplateHash == rsPodHash {
return true
}

return false
}

// canProceedWithScaleDownAnnotation returns whether or not it is safe to proceed with annotating
// old replicasets with the scale-down-deadline in the traffic-routed canary strategy.
// This method only matters with ALB canary + the target group verification feature.
Expand Down
8 changes: 7 additions & 1 deletion rollout/canary_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"github.com/argoproj/argo-rollouts/utils/conditions"
logutil "github.com/argoproj/argo-rollouts/utils/log"
"github.com/argoproj/argo-rollouts/utils/record"
replicasetutil "github.com/argoproj/argo-rollouts/utils/replicaset"
rolloututil "github.com/argoproj/argo-rollouts/utils/rollout"
timeutil "github.com/argoproj/argo-rollouts/utils/time"
)
Expand Down Expand Up @@ -738,6 +739,11 @@ func TestCanaryDontScaleDownOldRsDuringInterruptedUpdate(t *testing.T) {
rs1 := newReplicaSetWithStatus(r1, 5, 5)
rs2 := newReplicaSetWithStatus(r2, 5, 5)
rs3 := newReplicaSetWithStatus(r3, 5, 0)
r3.Status.Canary.Weights = &v1alpha1.TrafficWeights{
Canary: v1alpha1.WeightDestination{
PodTemplateHash: replicasetutil.GetPodTemplateHash(rs2),
},
}

f.objects = append(f.objects, r3)
f.kubeobjects = append(f.kubeobjects, rs1, rs2, rs3, canarySvc, stableSvc)
Expand All @@ -751,7 +757,7 @@ func TestCanaryDontScaleDownOldRsDuringInterruptedUpdate(t *testing.T) {
// TestCanaryScaleDownOldRsDuringInterruptedUpdate tests that we proceed with scale down of an
// intermediate V2 ReplicaSet when applying a V3 spec in the middle of updating a traffic routed
// canary going from V1 -> V2 (i.e. after we have shifted traffic away from V2). This test is the
// same as TestCanaryDontScaleDownOldRsDuringUpdate but rs3 is fully available
// same as TestCanaryDontScaleDownOldRsDuringInterruptedUpdate but rs3 is fully available
func TestCanaryScaleDownOldRsDuringInterruptedUpdate(t *testing.T) {
f := newFixture(t)
defer f.Close()
Expand Down
86 changes: 86 additions & 0 deletions test/e2e/istio/istio-host-split-update-in-middle.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
apiVersion: v1
kind: Service
metadata:
name: istio-host-split-canary
spec:
ports:
- port: 80
targetPort: http
protocol: TCP
name: http
selector:
app: istio-host-split

---
apiVersion: v1
kind: Service
metadata:
name: istio-host-split-stable
spec:
ports:
- port: 80
targetPort: http
protocol: TCP
name: http
selector:
app: istio-host-split

---
apiVersion: networking.istio.io/v1alpha3
kind: VirtualService
metadata:
name: istio-host-split-vsvc
spec:
hosts:
- istio-host-split
http:
- name: primary
route:
- destination:
host: istio-host-split-stable
weight: 100
- destination:
host: istio-host-split-canary
weight: 0

---
apiVersion: argoproj.io/v1alpha1
kind: Rollout
metadata:
name: istio-host-split
spec:
replicas: 2
strategy:
canary:
canaryService: istio-host-split-canary
stableService: istio-host-split-stable
trafficRouting:
istio:
virtualService:
name: istio-host-split-vsvc
routes:
- primary
steps:
- setWeight: 0
- setCanaryScale:
replicas: 1
- pause: {}
selector:
matchLabels:
app: istio-host-split
template:
metadata:
labels:
app: istio-host-split
spec:
containers:
- name: istio-host-split
image: nginx:1.19-alpine
ports:
- name: http
containerPort: 80
protocol: TCP
resources:
requests:
memory: 16Mi
cpu: 5m
41 changes: 29 additions & 12 deletions test/e2e/istio_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,25 @@ func (s *IstioSuite) TestIstioAbortUpdate() {
ExpectRevisionPodCount("2", 1)
}

func (s *IstioSuite) TestIstioUpdateInMiddleZeroCanaryReplicas() {
s.Given().
RolloutObjects("@istio/istio-host-split-update-in-middle.yaml").
When().
ApplyManifests().
WaitForRolloutStatus("Healthy").
Then().
When().
UpdateSpec().
WaitForRolloutStatus("Paused").
Then().
ExpectRevisionPodCount("2", 1).
When().
UpdateSpec().
WaitForRolloutStatus("Paused").
Then().
ExpectRevisionPodCount("3", 1)
}

func (s *IstioSuite) TestIstioAbortUpdateDeleteAllCanaryPods() {
s.Given().
RolloutObjects("@istio/istio-rollout-abort-delete-all-canary-pods.yaml").
Expand Down Expand Up @@ -361,17 +380,17 @@ func (s *IstioSuite) TestIstioSubsetSplitExperimentStep() {
WaitForRolloutStatus("Healthy").
Then().
Assert(func(t *fixtures.Then) {
vsvc := t.GetVirtualService()
assert.Equal(s.T(), int64(100), vsvc.Spec.HTTP[0].Route[0].Weight) // stable
assert.Equal(s.T(), int64(0), vsvc.Spec.HTTP[0].Route[1].Weight) // canary
vsvc := t.GetVirtualService()
assert.Equal(s.T(), int64(100), vsvc.Spec.HTTP[0].Route[0].Weight) // stable
assert.Equal(s.T(), int64(0), vsvc.Spec.HTTP[0].Route[1].Weight) // canary

rs1 := t.GetReplicaSetByRevision("1")
destrule := t.GetDestinationRule()
assert.Len(s.T(), destrule.Spec.Subsets, 2)
assert.Equal(s.T(), rs1.Spec.Template.Labels[v1alpha1.DefaultRolloutUniqueLabelKey], destrule.Spec.Subsets[0].Labels[v1alpha1.DefaultRolloutUniqueLabelKey]) // stable
assert.Equal(s.T(), rs1.Spec.Template.Labels[v1alpha1.DefaultRolloutUniqueLabelKey], destrule.Spec.Subsets[1].Labels[v1alpha1.DefaultRolloutUniqueLabelKey]) // canary
rs1 := t.GetReplicaSetByRevision("1")
destrule := t.GetDestinationRule()
assert.Len(s.T(), destrule.Spec.Subsets, 2)
assert.Equal(s.T(), rs1.Spec.Template.Labels[v1alpha1.DefaultRolloutUniqueLabelKey], destrule.Spec.Subsets[0].Labels[v1alpha1.DefaultRolloutUniqueLabelKey]) // stable
assert.Equal(s.T(), rs1.Spec.Template.Labels[v1alpha1.DefaultRolloutUniqueLabelKey], destrule.Spec.Subsets[1].Labels[v1alpha1.DefaultRolloutUniqueLabelKey]) // canary

}).
}).
When().
UpdateSpec().
WaitForRolloutCanaryStepIndex(1).
Expand Down Expand Up @@ -401,7 +420,7 @@ func (s *IstioSuite) TestIstioSubsetSplitExperimentStep() {
Assert(func(t *fixtures.Then) {
vsvc := t.GetVirtualService()
assert.Equal(s.T(), int64(100), vsvc.Spec.HTTP[0].Route[0].Weight) // stable
assert.Equal(s.T(), int64(0), vsvc.Spec.HTTP[0].Route[1].Weight) // canary
assert.Equal(s.T(), int64(0), vsvc.Spec.HTTP[0].Route[1].Weight) // canary

destrule := t.GetDestinationRule()
rs2 := t.GetReplicaSetByRevision("2")
Expand All @@ -413,5 +432,3 @@ func (s *IstioSuite) TestIstioSubsetSplitExperimentStep() {

s.TearDownSuite()
}