diff --git a/docs/features/scaledown-aborted-rs.md b/docs/features/scaledown-aborted-rs.md new file mode 100644 index 0000000000..40740fce64 --- /dev/null +++ b/docs/features/scaledown-aborted-rs.md @@ -0,0 +1,18 @@ +# Scaledown New Replicaset on Aborted Rollout + +Upon an aborted update, we may scale down the new replicaset for all strategies. Users can then choose to leave the new replicaset scaled up indefinitely by setting abortScaleDownDelaySeconds to 0, or adjust the value to something larger (or smaller). + +The following table summarizes the behavior under combinations of rollout strategy and `abortScaleDownDelaySeconds`. Note that `abortScaleDownDelaySeconds` is not applicable to argo-rollouts v1.0. +`abortScaleDownDelaySeconds = nil` is the default, which means in v1.1 across all rollout strategies, the new replicaset +is scaled down in 30 seconds on abort by default. + +| strategy | v1.0 behavior | abortScaleDownDelaySeconds | v1.1 behavior | +|--------------------------------------------:|:-----------------------------:|:--------------------------:|:-----------------------------:| +| blue-green | does not scale down | nil | scales down after 30 seconds | +| blue-green | does not scale down | 0 | does not scale down | +| blue-green | does not scale down | N | scales down after N seconds | +| basic canary | rolling update back to stable | N/A | rolling update back to stable | +| canary w/ traffic routing | scales down immediately | nil | scales down after 30 seconds | +| canary w/ traffic routing | scales down immediately | 0 | does not scale down | +| canary w/ traffic routing | scales down immediately | N | scales down after N seconds | +| canary w/ traffic routing + setCanaryScale | does not scale down (bug) | * | should behave like canary w/ traffic routing | diff --git a/docs/features/specification.md b/docs/features/specification.md index 7099a7214d..60acd6cb21 100644 --- a/docs/features/specification.md +++ b/docs/features/specification.md @@ -115,6 +115,10 @@ spec: # down. Defaults to nil scaleDownDelayRevisionLimit: 2 + # Add a delay in second before scaling down the preview replicaset + # if update is aborted. 0 means not to scale down. Default is 30 second + abortScaleDownDelaySeconds: 30 + # Anti Affinity configuration between desired and previous ReplicaSet. # Only one must be specified antiAffinity: @@ -295,6 +299,11 @@ spec: rootService: root-svc # optional trafficSplitName: rollout-example-traffic-split # optional + # Add a delay in second before scaling down the canary pods when update + # is aborted for canary strategy with traffic routing (not applicable for basic canary). + # 0 means canary pods are not scaled down. Default is 30 seconds. + abortScaleDownDelaySeconds: 30 + status: pauseConditions: - reason: StepPause diff --git a/manifests/crds/rollout-crd.yaml b/manifests/crds/rollout-crd.yaml index 582a80b5c6..a54aaa4891 100644 --- a/manifests/crds/rollout-crd.yaml +++ b/manifests/crds/rollout-crd.yaml @@ -91,6 +91,9 @@ spec: properties: blueGreen: properties: + abortScaleDownDelaySeconds: + format: int32 + type: integer activeMetadata: properties: annotations: @@ -224,6 +227,9 @@ spec: type: object canary: properties: + abortScaleDownDelaySeconds: + format: int32 + type: integer analysis: properties: args: diff --git a/manifests/install.yaml b/manifests/install.yaml index bb04a3e55a..8df0122b3c 100644 --- a/manifests/install.yaml +++ b/manifests/install.yaml @@ -9780,6 +9780,9 @@ spec: properties: blueGreen: properties: + abortScaleDownDelaySeconds: + format: int32 + type: integer activeMetadata: properties: annotations: @@ -9913,6 +9916,9 @@ spec: type: object canary: properties: + abortScaleDownDelaySeconds: + format: int32 + type: integer analysis: properties: args: diff --git a/manifests/namespace-install.yaml b/manifests/namespace-install.yaml index 6c88b4e6f9..5b0e730504 100644 --- a/manifests/namespace-install.yaml +++ b/manifests/namespace-install.yaml @@ -9780,6 +9780,9 @@ spec: properties: blueGreen: properties: + abortScaleDownDelaySeconds: + format: int32 + type: integer activeMetadata: properties: annotations: @@ -9913,6 +9916,9 @@ spec: type: object canary: properties: + abortScaleDownDelaySeconds: + format: int32 + type: integer analysis: properties: args: diff --git a/pkg/apiclient/rollout/rollout.swagger.json b/pkg/apiclient/rollout/rollout.swagger.json index a1be33d195..9e20548068 100644 --- a/pkg/apiclient/rollout/rollout.swagger.json +++ b/pkg/apiclient/rollout/rollout.swagger.json @@ -662,6 +662,11 @@ "activeMetadata": { "$ref": "#/definitions/github.com.argoproj.argo_rollouts.pkg.apis.rollouts.v1alpha1.PodTemplateMetadata", "title": "ActiveMetadata specify labels and annotations which will be attached to the active pods for\nthe duration which they act as a active pod, and will be removed after" + }, + "abortScaleDownDelaySeconds": { + "type": "integer", + "format": "int32", + "title": "AbortScaleDownDelaySeconds adds a delay in second before scaling down the preview replicaset\nif update is aborted. 0 means not to scale down.\nDefault is 30 second\n+optional" } }, "title": "BlueGreenStrategy defines parameters for Blue Green deployment" @@ -766,6 +771,11 @@ "type": "integer", "format": "int32", "title": "ScaleDownDelayRevisionLimit limits the number of old RS that can run at one time before getting scaled down\n+optional" + }, + "abortScaleDownDelaySeconds": { + "type": "integer", + "format": "int32", + "title": "AbortScaleDownDelaySeconds adds a delay in second before scaling down the canary pods when update\nis aborted for canary strategy with traffic routing (not applicable for basic canary).\n0 means canary pods are not scaled down.\nDefault is 30 seconds.\n+optional" } }, "title": "CanaryStrategy defines parameters for a Replica Based Canary" diff --git a/pkg/apis/rollouts/v1alpha1/types.go b/pkg/apis/rollouts/v1alpha1/types.go index b84a62c0ee..589fdc821d 100644 --- a/pkg/apis/rollouts/v1alpha1/types.go +++ b/pkg/apis/rollouts/v1alpha1/types.go @@ -202,6 +202,11 @@ type BlueGreenStrategy struct { // ActiveMetadata specify labels and annotations which will be attached to the active pods for // the duration which they act as a active pod, and will be removed after ActiveMetadata *PodTemplateMetadata `json:"activeMetadata,omitempty" protobuf:"bytes,13,opt,name=activeMetadata"` + // AbortScaleDownDelaySeconds adds a delay in second before scaling down the preview replicaset + // if update is aborted. 0 means not to scale down. + // Default is 30 second + // +optional + AbortScaleDownDelaySeconds *int32 `json:"abortScaleDownDelaySeconds,omitempty" protobuf:"varint,14,opt,name=abortScaleDownDelaySeconds"` } // AntiAffinity defines which inter-pod scheduling rule to use for anti-affinity injection @@ -282,6 +287,12 @@ type CanaryStrategy struct { // ScaleDownDelayRevisionLimit limits the number of old RS that can run at one time before getting scaled down // +optional ScaleDownDelayRevisionLimit *int32 `json:"scaleDownDelayRevisionLimit,omitempty" protobuf:"varint,12,opt,name=scaleDownDelayRevisionLimit"` + // AbortScaleDownDelaySeconds adds a delay in second before scaling down the canary pods when update + // is aborted for canary strategy with traffic routing (not applicable for basic canary). + // 0 means canary pods are not scaled down. + // Default is 30 seconds. + // +optional + AbortScaleDownDelaySeconds *int32 `json:"abortScaleDownDelaySeconds,omitempty" protobuf:"varint,13,opt,name=abortScaleDownDelaySeconds"` } // ALBTrafficRouting configuration for ALB ingress controller to control traffic routing diff --git a/pkg/apis/rollouts/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/rollouts/v1alpha1/zz_generated.deepcopy.go index ef2cd8be0c..f064a2e228 100644 --- a/pkg/apis/rollouts/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/apis/rollouts/v1alpha1/zz_generated.deepcopy.go @@ -449,6 +449,11 @@ func (in *BlueGreenStrategy) DeepCopyInto(out *BlueGreenStrategy) { *out = new(PodTemplateMetadata) (*in).DeepCopyInto(*out) } + if in.AbortScaleDownDelaySeconds != nil { + in, out := &in.AbortScaleDownDelaySeconds, &out.AbortScaleDownDelaySeconds + *out = new(int32) + **out = **in + } return } @@ -584,6 +589,11 @@ func (in *CanaryStrategy) DeepCopyInto(out *CanaryStrategy) { *out = new(int32) **out = **in } + if in.AbortScaleDownDelaySeconds != nil { + in, out := &in.AbortScaleDownDelaySeconds, &out.AbortScaleDownDelaySeconds + *out = new(int32) + **out = **in + } return } diff --git a/rollout/bluegreen_test.go b/rollout/bluegreen_test.go index ad37c8c073..500343a521 100644 --- a/rollout/bluegreen_test.go +++ b/rollout/bluegreen_test.go @@ -26,9 +26,11 @@ var ( func newBlueGreenRollout(name string, replicas int, revisionHistoryLimit *int32, activeSvc string, previewSvc string) *v1alpha1.Rollout { rollout := newRollout(name, replicas, revisionHistoryLimit, map[string]string{"foo": "bar"}) + abortScaleDownDelaySeconds := int32(0) rollout.Spec.Strategy.BlueGreen = &v1alpha1.BlueGreenStrategy{ - ActiveService: activeSvc, - PreviewService: previewSvc, + ActiveService: activeSvc, + PreviewService: previewSvc, + AbortScaleDownDelaySeconds: &abortScaleDownDelaySeconds, } rollout.Status.CurrentStepHash = conditions.ComputeStepHash(rollout) rollout.Status.CurrentPodHash = controller.ComputeHash(&rollout.Spec.Template, rollout.Status.CollisionCount) diff --git a/rollout/replicaset.go b/rollout/replicaset.go index d3d30f0718..6adad2cf47 100644 --- a/rollout/replicaset.go +++ b/rollout/replicaset.go @@ -39,13 +39,12 @@ func (c *rolloutContext) removeScaleDownDelay(rs *appsv1.ReplicaSet) error { } // addScaleDownDelay injects the `scale-down-deadline` annotation to the ReplicaSet, or if -// scaleDownDelaySeconds is zero, removes it if it exists -func (c *rolloutContext) addScaleDownDelay(rs *appsv1.ReplicaSet) error { +// scaleDownDelaySeconds is zero, removes the annotation if it exists +func (c *rolloutContext) addScaleDownDelay(rs *appsv1.ReplicaSet, scaleDownDelaySeconds time.Duration) error { if rs == nil { return nil } ctx := context.TODO() - scaleDownDelaySeconds := time.Duration(defaults.GetScaleDownDelaySecondsOrDefault(c.rollout)) if scaleDownDelaySeconds == 0 { // If scaledown deadline is zero, it means we need to remove any replicasets with the delay // This might happen if we switch from canary with traffic routing to basic canary @@ -91,11 +90,11 @@ func (c *Controller) getReplicaSetsForRollouts(r *v1alpha1.Rollout) ([]*appsv1.R return cm.ClaimReplicaSets(rsList) } -// removeScaleDownDelays removes the scale-down-deadline annotation from the new/stable ReplicaSets, +// removeScaleDownDeadlines removes the scale-down-deadline annotation from the new/stable ReplicaSets, // in the event that we moved back to an older revision that is still within its scaleDownDelay. func (c *rolloutContext) removeScaleDownDeadlines() error { var toRemove []*appsv1.ReplicaSet - if c.newRS != nil { + if c.newRS != nil && !c.isScaleDownOnabort() { toRemove = append(toRemove, c.newRS) } if c.stableRS != nil { @@ -120,10 +119,52 @@ func (c *rolloutContext) reconcileNewReplicaSet() (bool, error) { if err != nil { return false, err } + + if c.isScaleDownOnabort() { + c.log.Infof("Scale down new rs '%s' on abort", c.newRS.Name) + + // if the newRS has scale down annotation, check if it should be scaled down now + if scaleDownAtStr, ok := c.newRS.Annotations[v1alpha1.DefaultReplicaSetScaleDownDeadlineAnnotationKey]; ok { + c.log.Infof("New rs '%s' has scaledown deadline annotation: %s", c.newRS.Name, scaleDownAtStr) + scaleDownAtTime, err := time.Parse(time.RFC3339, scaleDownAtStr) + if err != nil { + c.log.Warnf("Unable to read scaleDownAt label on rs '%s'", c.newRS.Name) + } else { + now := metav1.Now() + scaleDownAt := metav1.NewTime(scaleDownAtTime) + if scaleDownAt.After(now.Time) { + c.log.Infof("RS '%s' has not reached the scaleDownTime", c.newRS.Name) + remainingTime := scaleDownAt.Sub(now.Time) + if remainingTime < c.resyncPeriod { + c.enqueueRolloutAfter(c.rollout, remainingTime) + return false, nil + } + } else { + c.log.Infof("RS '%s' has reached the scaleDownTime", c.newRS.Name) + newReplicasCount = int32(0) + } + } + } else { + abortScaleDownDelaySeconds := time.Duration(defaults.GetAbortScaleDownDelaySecondsOrDefault(c.rollout)) + err = c.addScaleDownDelay(c.newRS, abortScaleDownDelaySeconds) + if err != nil { + return false, err + } + } + } + scaled, _, err := c.scaleReplicaSetAndRecordEvent(c.newRS, newReplicasCount) return scaled, err } +func (c *rolloutContext) isScaleDownOnabort() bool { + abortScaleDownDelaySeconds := time.Duration(defaults.GetAbortScaleDownDelaySecondsOrDefault(c.rollout)) + if c.pauseContext != nil && c.pauseContext.IsAborted() && abortScaleDownDelaySeconds > 0 { + return true + } + return false +} + // reconcileOtherReplicaSets reconciles "other" ReplicaSets. // Other ReplicaSets are ReplicaSets are neither the new or stable (allRSs - newRS - stableRS) func (c *rolloutContext) reconcileOtherReplicaSets() (bool, error) { diff --git a/rollout/replicaset_test.go b/rollout/replicaset_test.go index 1490aa1b91..83c7370c83 100644 --- a/rollout/replicaset_test.go +++ b/rollout/replicaset_test.go @@ -121,12 +121,16 @@ func TestGetReplicaSetsForRollouts(t *testing.T) { func TestReconcileNewReplicaSet(t *testing.T) { tests := []struct { - name string - rolloutReplicas int - newReplicas int - scaleExpected bool - expectedNewReplicas int + name string + rolloutReplicas int + newReplicas int + scaleExpected bool + abortScaleDownDelaySeconds int32 + abortScaleDownAnnotated bool + abortScaleDownDelayPassed bool + expectedNewReplicas int }{ + { name: "New Replica Set matches rollout replica: No scale", rolloutReplicas: 10, @@ -147,6 +151,38 @@ func TestReconcileNewReplicaSet(t *testing.T) { scaleExpected: true, expectedNewReplicas: 10, }, + + { + name: "New Replica scaled down to 0: scale down on abort - deadline passed", + rolloutReplicas: 10, + newReplicas: 10, + // ScaleDownOnAbort: true, + abortScaleDownDelaySeconds: 5, + abortScaleDownAnnotated: true, + abortScaleDownDelayPassed: true, + scaleExpected: true, + expectedNewReplicas: 0, + }, + { + name: "New Replica scaled down to 0: scale down on abort - deadline not passed", + rolloutReplicas: 10, + newReplicas: 8, + // ScaleDownOnAbort: true, + abortScaleDownDelaySeconds: 5, + abortScaleDownAnnotated: true, + abortScaleDownDelayPassed: false, + scaleExpected: false, + expectedNewReplicas: 0, + }, + { + name: "New Replica scaled down to 0: scale down on abort - add annotation", + rolloutReplicas: 10, + newReplicas: 10, + abortScaleDownDelaySeconds: 5, + abortScaleDownAnnotated: false, + scaleExpected: false, + expectedNewReplicas: 0, + }, } for i := range tests { test := tests[i] @@ -164,8 +200,33 @@ func TestReconcileNewReplicaSet(t *testing.T) { argoprojclientset: &fake, kubeclientset: &k8sfake, recorder: record.NewFakeEventRecorder(), + resyncPeriod: 30 * time.Second, }, } + roCtx.enqueueRolloutAfter = func(obj interface{}, duration time.Duration) {} + if test.abortScaleDownDelaySeconds > 0 { + roCtx.pauseContext = &pauseContext{ + rollout: rollout, + } + rollout.Status.Abort = true + // rollout.Spec.ScaleDownOnAbort = true + rollout.Spec.Strategy = v1alpha1.RolloutStrategy{ + BlueGreen: &v1alpha1.BlueGreenStrategy{ + AbortScaleDownDelaySeconds: &test.abortScaleDownDelaySeconds, + }, + } + + if test.abortScaleDownAnnotated { + var deadline string + if test.abortScaleDownDelayPassed { + deadline = metav1.Now().Add(-time.Duration(test.abortScaleDownDelaySeconds) * time.Second).UTC().Format(time.RFC3339) + } else { + deadline = metav1.Now().Add(time.Duration(test.abortScaleDownDelaySeconds) * time.Second).UTC().Format(time.RFC3339) + } + roCtx.newRS.Annotations[v1alpha1.DefaultReplicaSetScaleDownDeadlineAnnotationKey] = deadline + } + } + scaled, err := roCtx.reconcileNewReplicaSet() if err != nil { t.Errorf("unexpected error: %v", err) diff --git a/rollout/sync.go b/rollout/sync.go index 1ad02a35ba..f8edddcd51 100644 --- a/rollout/sync.go +++ b/rollout/sync.go @@ -390,7 +390,7 @@ func (c *rolloutContext) scaleReplicaSet(rs *appsv1.ReplicaSet, newScale int32, oldScale := defaults.GetReplicasOrDefault(rs.Spec.Replicas) *(rsCopy.Spec.Replicas) = newScale annotations.SetReplicasAnnotations(rsCopy, rolloutReplicas) - if fullScaleDown { + if fullScaleDown && !c.isScaleDownOnabort() { delete(rsCopy.Annotations, v1alpha1.DefaultReplicaSetScaleDownDeadlineAnnotationKey) } rs, err = c.kubeclientset.AppsV1().ReplicaSets(rsCopy.Namespace).Update(ctx, rsCopy, metav1.UpdateOptions{}) @@ -927,7 +927,8 @@ func (c *rolloutContext) promoteStable(newStatus *v1alpha1.RolloutStatus, reason // Now that we've marked the desired RS as stable, start the scale-down countdown on the previous stable RS previousStableRS, _ := replicasetutil.GetReplicaSetByTemplateHash(c.olderRSs, previousStableHash) if replicasetutil.GetReplicaCountForReplicaSets([]*appsv1.ReplicaSet{previousStableRS}) > 0 { - err := c.addScaleDownDelay(previousStableRS) + scaleDownDelaySeconds := time.Duration(defaults.GetScaleDownDelaySecondsOrDefault(c.rollout)) + err := c.addScaleDownDelay(previousStableRS, scaleDownDelaySeconds) if err != nil { return err } diff --git a/test/e2e/analysis_test.go b/test/e2e/analysis_test.go index 42e3e2135e..317977c2cb 100644 --- a/test/e2e/analysis_test.go +++ b/test/e2e/analysis_test.go @@ -195,6 +195,7 @@ spec: replicas: 2 strategy: blueGreen: + abortScaleDownDelaySeconds: 0 activeService: pre-promotion-fail-active previewService: pre-promotion-fail-preview previewReplicaCount: 1 @@ -402,7 +403,7 @@ spec: WaitForRolloutStatus("Paused"). Then(). ExpectRevisionPodCount("1", 1). - ExpectRevisionPodCount("2", 0). + ExpectRevisionScaleDown("2", true). ExpectRevisionPodCount("3", 1). ExpectActiveRevision("1"). ExpectPreviewRevision("3"). @@ -411,7 +412,8 @@ spec: WaitForRolloutStatus("Healthy"). Then(). ExpectRevisionPodCount("1", 1). - ExpectRevisionPodCount("2", 0). + ExpectRevisionScaleDown("1", true). + ExpectRevisionScaleDown("2", true). ExpectRevisionPodCount("3", 1). ExpectActiveRevision("3"). ExpectPreviewRevision("3"). diff --git a/test/e2e/canary_test.go b/test/e2e/canary_test.go index 9c0158f4e3..acc0f0d600 100644 --- a/test/e2e/canary_test.go +++ b/test/e2e/canary_test.go @@ -473,3 +473,31 @@ spec: assert.NotEmpty(s.T(), rs3.Annotations[v1alpha1.DefaultReplicaSetScaleDownDeadlineAnnotationKey]) }) } + +// TestCanaryScaleDownOnAbort verifies scaledownOnAbort feature for canary +func (s *CanarySuite) TestCanaryScaleDownOnAbort() { + s.Given(). + HealthyRollout(`@functional/canary-scaledownonabort.yaml`). + When(). + UpdateSpec(). // update to revision 2 + WaitForRolloutStatus("Paused"). + AbortRollout(). + WaitForRolloutStatus("Degraded"). + Sleep(3*time.Second). + Then(). + ExpectRevisionPodCount("2", 0) +} + +func (s *CanarySuite) TestCanaryUnScaleDownOnAbort() { + s.Given(). + HealthyRollout(`@functional/canary-unscaledownonabort.yaml`). + When(). + UpdateSpec(). // update to revision 2 + WaitForRolloutStatus("Paused"). + AbortRollout(). + WaitForRolloutStatus("Degraded"). + Sleep(3*time.Second). + Then(). + ExpectRevisionPodCount("2", 1). + ExpectRevisionScaleDown("2", false) +} diff --git a/test/e2e/functional/canary-scaledownonabort.yaml b/test/e2e/functional/canary-scaledownonabort.yaml new file mode 100644 index 0000000000..8d5d05aff3 --- /dev/null +++ b/test/e2e/functional/canary-scaledownonabort.yaml @@ -0,0 +1,96 @@ +apiVersion: v1 +kind: Service +metadata: + name: canary-scaledowndelay-root +spec: + type: NodePort + ports: + - port: 80 + targetPort: http + protocol: TCP + name: http + selector: + app: canary-scaledowndelay +--- +apiVersion: v1 +kind: Service +metadata: + name: canary-scaledowndelay-canary +spec: + type: NodePort + ports: + - port: 80 + targetPort: http + protocol: TCP + name: http + selector: + app: canary-scaledowndelay +--- +apiVersion: v1 +kind: Service +metadata: + name: canary-scaledowndelay-stable +spec: + type: NodePort + ports: + - port: 80 + targetPort: http + protocol: TCP + name: http + selector: + app: canary-scaledowndelay +--- +apiVersion: networking.k8s.io/v1beta1 +kind: Ingress +metadata: + name: canary-scaledowndelay-ingress + annotations: + kubernetes.io/ingress.class: alb +spec: + rules: + - http: + paths: + - path: /* + backend: + serviceName: canary-scaledowndelay-root + servicePort: use-annotation +--- +apiVersion: argoproj.io/v1alpha1 +kind: Rollout +metadata: + name: canary-scaledownd-on-abort +spec: + selector: + matchLabels: + app: canary-scaledowndelay + template: + metadata: + labels: + app: canary-scaledowndelay + spec: + containers: + - name: canary-scaledowndelay + image: nginx:1.19-alpine + ports: + - name: http + containerPort: 80 + protocol: TCP + resources: + requests: + memory: 16Mi + cpu: 5m + strategy: + canary: + abortScaleDownDelaySeconds: 1 + scaleDownDelayRevisionLimit: 1 + canaryService: canary-scaledowndelay-canary + stableService: canary-scaledowndelay-stable + steps: + - setCanaryScale: + replicas: 1 + - pause: {} + trafficRouting: + alb: + ingress: canary-scaledowndelay-ingress + rootService: canary-scaledowndelay-root + servicePort: 80 \ No newline at end of file diff --git a/test/e2e/functional/canary-unscaledownonabort.yaml b/test/e2e/functional/canary-unscaledownonabort.yaml new file mode 100644 index 0000000000..cf9ce26438 --- /dev/null +++ b/test/e2e/functional/canary-unscaledownonabort.yaml @@ -0,0 +1,96 @@ +apiVersion: v1 +kind: Service +metadata: + name: canary-scaledowndelay-root +spec: + type: NodePort + ports: + - port: 80 + targetPort: http + protocol: TCP + name: http + selector: + app: canary-scaledowndelay +--- +apiVersion: v1 +kind: Service +metadata: + name: canary-scaledowndelay-canary +spec: + type: NodePort + ports: + - port: 80 + targetPort: http + protocol: TCP + name: http + selector: + app: canary-scaledowndelay +--- +apiVersion: v1 +kind: Service +metadata: + name: canary-scaledowndelay-stable +spec: + type: NodePort + ports: + - port: 80 + targetPort: http + protocol: TCP + name: http + selector: + app: canary-scaledowndelay +--- +apiVersion: networking.k8s.io/v1beta1 +kind: Ingress +metadata: + name: canary-scaledowndelay-ingress + annotations: + kubernetes.io/ingress.class: alb +spec: + rules: + - http: + paths: + - path: /* + backend: + serviceName: canary-scaledowndelay-root + servicePort: use-annotation +--- +apiVersion: argoproj.io/v1alpha1 +kind: Rollout +metadata: + name: canary-scaledownd-on-abort +spec: + selector: + matchLabels: + app: canary-scaledowndelay + template: + metadata: + labels: + app: canary-scaledowndelay + spec: + containers: + - name: canary-scaledowndelay + image: nginx:1.19-alpine + ports: + - name: http + containerPort: 80 + protocol: TCP + resources: + requests: + memory: 16Mi + cpu: 5m + strategy: + canary: + abortScaleDownDelaySeconds: 0 + scaleDownDelayRevisionLimit: 1 + canaryService: canary-scaledowndelay-canary + stableService: canary-scaledowndelay-stable + steps: + - setCanaryScale: + replicas: 1 + - pause: {} + trafficRouting: + alb: + ingress: canary-scaledowndelay-ingress + rootService: canary-scaledowndelay-root + servicePort: 80 \ No newline at end of file diff --git a/test/e2e/functional_test.go b/test/e2e/functional_test.go index 12bd138f4e..f038fa0fce 100644 --- a/test/e2e/functional_test.go +++ b/test/e2e/functional_test.go @@ -936,6 +936,58 @@ spec: ExpectReplicaCounts(1, 2, 1, 1, 1) } +// TestBlueGreenScaleDownOnAbort verifies the scaleDownOnAbort feature +func (s *FunctionalSuite) TestBlueGreenScaleDownOnAbort() { + s.Given(). + RolloutObjects(newService("bluegreen-preview-replicas-active")). + RolloutObjects(newService("bluegreen-preview-replicas-preview")). + RolloutObjects(` +apiVersion: argoproj.io/v1alpha1 +kind: Rollout +metadata: + name: bluegreen-scaledown-on-abort +spec: + replicas: 2 + strategy: + blueGreen: + abortScaleDownDelaySeconds: 1 + activeService: bluegreen-preview-replicas-active + previewService: bluegreen-preview-replicas-preview + previewReplicaCount: 1 + scaleDownDelaySeconds: 5 + autoPromotionEnabled: false + selector: + matchLabels: + app: bluegreen-preview-replicas + template: + metadata: + labels: + app: bluegreen-preview-replicas + spec: + containers: + - name: bluegreen-preview-replicas + image: nginx:1.19-alpine + resources: + requests: + memory: 16Mi + cpu: 1m +`). + When(). + ApplyManifests(). + WaitForRolloutStatus("Healthy"). + UpdateSpec(). + WaitForRolloutStatus("Paused"). + Then(). + ExpectRevisionPodCount("2", 1). + ExpectRevisionPodCount("1", 2). + When(). + AbortRollout(). + WaitForRolloutStatus("Degraded"). + Sleep(3*time.Second). + Then(). + ExpectRevisionPodCount("2", 0) +} + func (s *FunctionalSuite) TestKubectlWaitForPaused() { s.Given(). HealthyRollout(` diff --git a/test/fixtures/then.go b/test/fixtures/then.go index 7e51dbb010..badec1b09e 100644 --- a/test/fixtures/then.go +++ b/test/fixtures/then.go @@ -9,6 +9,7 @@ import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1" rov1 "github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1" "github.com/argoproj/argo-rollouts/utils/defaults" replicasetutil "github.com/argoproj/argo-rollouts/utils/replicaset" @@ -73,6 +74,7 @@ func (t *Then) ExpectReplicaCounts(desired, current, updated, ready, available i } type PodExpectation func(*corev1.PodList) bool +type ReplicasetExpectation func(*appsv1.ReplicaSet) bool func (t *Then) ExpectPods(expectation string, expectFunc PodExpectation) *Then { t.t.Helper() @@ -115,6 +117,44 @@ func (t *Then) ExpectRevisionPodCount(revision string, expectedCount int) *Then return t.expectPodCountByHash(description, hash, expectedCount) } +func (t *Then) ExpectRevisionScaleDown(revision string, expectScaleDown bool) *Then { + t.t.Helper() + rs := t.GetReplicaSetByRevision(revision) + description := fmt.Sprintf("revision:%s", revision) + return t.expectRSScaleDownByName(description, rs.Name, expectScaleDown) +} + +func (t *Then) expectRSScaleDownByName(description, name string, expectScaleDown bool) *Then { + return t.ExpectRS(fmt.Sprintf("RS %s scale down", name), name, func(rs *appsv1.ReplicaSet) bool { + hasScaleDownDelay := false + + if _, ok := rs.Annotations[v1alpha1.DefaultReplicaSetScaleDownDeadlineAnnotationKey]; ok { + hasScaleDownDelay = true + } + + metExpectation := hasScaleDownDelay == expectScaleDown + if !metExpectation { + t.log.Warnf("unexpected %s (rs %s): expected to be scaled down is %v", description, name, expectScaleDown) + } + return metExpectation + }) +} + +func (t *Then) ExpectRS(expectation string, name string, expectFunc ReplicasetExpectation) *Then { + t.t.Helper() + _, err := metav1.LabelSelectorAsSelector(t.Rollout().Spec.Selector) + t.CheckError(err) + rs, err := t.kubeClient.AppsV1().ReplicaSets(t.namespace).Get(t.Context, name, metav1.GetOptions{}) + // List(t.Context, metav1.ListOptions{LabelSelector: selector.String()}) + t.CheckError(err) + if !expectFunc(rs) { + t.log.Errorf("rs expectation '%s' failed", expectation) + t.t.FailNow() + } + t.log.Infof("rs expectation '%s' met", expectation) + return t +} + func (t *Then) expectPodCountByHash(description, hash string, expectedCount int) *Then { return t.ExpectPods(fmt.Sprintf("%s pod count == %d", description, expectedCount), func(pods *corev1.PodList) bool { count := 0 diff --git a/utils/defaults/defaults.go b/utils/defaults/defaults.go index 68ea380f93..f4cf6bf336 100644 --- a/utils/defaults/defaults.go +++ b/utils/defaults/defaults.go @@ -23,6 +23,8 @@ const ( DefaultProgressDeadlineSeconds = int32(600) // DefaultScaleDownDelaySeconds default seconds before scaling down old replicaset after switching services DefaultScaleDownDelaySeconds = int32(30) + // DefaultAbortScaleDownDelaySeconds default seconds before scaling down old replicaset after switching services + DefaultAbortScaleDownDelaySeconds = int32(30) // DefaultAutoPromotionEnabled default value for auto promoting a blueGreen strategy DefaultAutoPromotionEnabled = true // DefaultConsecutiveErrorLimit is the default number times a metric can error in sequence before @@ -111,6 +113,24 @@ func GetScaleDownDelaySecondsOrDefault(rollout *v1alpha1.Rollout) int32 { return 0 } +func GetAbortScaleDownDelaySecondsOrDefault(rollout *v1alpha1.Rollout) int32 { + if rollout.Spec.Strategy.BlueGreen != nil { + if rollout.Spec.Strategy.BlueGreen.AbortScaleDownDelaySeconds != nil { + return *rollout.Spec.Strategy.BlueGreen.AbortScaleDownDelaySeconds + } + return DefaultAbortScaleDownDelaySeconds + } + if rollout.Spec.Strategy.Canary != nil { + if rollout.Spec.Strategy.Canary.TrafficRouting != nil { + if rollout.Spec.Strategy.Canary.AbortScaleDownDelaySeconds != nil { + return *rollout.Spec.Strategy.Canary.AbortScaleDownDelaySeconds + } + return DefaultAbortScaleDownDelaySeconds + } + } + return 0 +} + func GetAutoPromotionEnabledOrDefault(rollout *v1alpha1.Rollout) bool { if rollout.Spec.Strategy.BlueGreen == nil { return DefaultAutoPromotionEnabled diff --git a/utils/defaults/defaults_test.go b/utils/defaults/defaults_test.go index 5e55c6aa34..c1d25aef7e 100644 --- a/utils/defaults/defaults_test.go +++ b/utils/defaults/defaults_test.go @@ -165,6 +165,62 @@ func TestGetScaleDownDelaySecondsOrDefault(t *testing.T) { } } +func TestGetAbortScaleDownDelaySecondsOrDefault(t *testing.T) { + { + abortScaleDownDelaySeconds := int32(60) + blueGreenNonDefaultValue := &v1alpha1.Rollout{ + Spec: v1alpha1.RolloutSpec{ + Strategy: v1alpha1.RolloutStrategy{ + BlueGreen: &v1alpha1.BlueGreenStrategy{ + AbortScaleDownDelaySeconds: &abortScaleDownDelaySeconds, + }, + }, + }, + } + assert.Equal(t, abortScaleDownDelaySeconds, GetAbortScaleDownDelaySecondsOrDefault(blueGreenNonDefaultValue)) + } + { + blueGreenDefaultValue := &v1alpha1.Rollout{ + Spec: v1alpha1.RolloutSpec{ + Strategy: v1alpha1.RolloutStrategy{ + BlueGreen: &v1alpha1.BlueGreenStrategy{}, + }, + }, + } + assert.Equal(t, DefaultAbortScaleDownDelaySeconds, GetAbortScaleDownDelaySecondsOrDefault(blueGreenDefaultValue)) + } + { + abortScaleDownDelaySeconds := int32(60) + canaryNonDefaultValue := &v1alpha1.Rollout{ + Spec: v1alpha1.RolloutSpec{ + Strategy: v1alpha1.RolloutStrategy{ + Canary: &v1alpha1.CanaryStrategy{ + AbortScaleDownDelaySeconds: &abortScaleDownDelaySeconds, + TrafficRouting: &v1alpha1.RolloutTrafficRouting{}, + }, + }, + }, + } + assert.Equal(t, abortScaleDownDelaySeconds, GetAbortScaleDownDelaySecondsOrDefault(canaryNonDefaultValue)) + } + { + rolloutNoStrategyDefaultValue := &v1alpha1.Rollout{} + assert.Equal(t, int32(0), GetAbortScaleDownDelaySecondsOrDefault(rolloutNoStrategyDefaultValue)) + } + { + canaryDefaultValue := &v1alpha1.Rollout{ + Spec: v1alpha1.RolloutSpec{ + Strategy: v1alpha1.RolloutStrategy{ + Canary: &v1alpha1.CanaryStrategy{ + TrafficRouting: &v1alpha1.RolloutTrafficRouting{}, + }, + }, + }, + } + assert.Equal(t, DefaultAbortScaleDownDelaySeconds, GetAbortScaleDownDelaySecondsOrDefault(canaryDefaultValue)) + } +} + func TestGetAutoPromotionEnabledOrDefault(t *testing.T) { autoPromote := false rolloutNonDefaultValue := &v1alpha1.Rollout{