From 549fc146bc38253789b18e749c3ce7ad444ed7d0 Mon Sep 17 00:00:00 2001 From: Hui Kang Date: Fri, 16 Jul 2021 00:21:32 -0400 Subject: [PATCH] Set default DefaultAbortScaleDownDelaySeconds to 30s - Fix bg e2e: scaledown aborted pod in 30sec; so check the delay annotation - canary e2e test: canay pod is kept running when abortScaleDownDelaySeconds=0 Signed-off-by: Hui Kang --- docs/features/scaledown-aborted-rs.md | 6 +- docs/features/specification.md | 12 +-- pkg/apiclient/rollout/rollout.swagger.json | 4 +- pkg/apis/rollouts/v1alpha1/types.go | 10 +- rollout/bluegreen_test.go | 6 +- rollout/replicaset.go | 2 +- test/e2e/analysis_test.go | 6 +- test/e2e/canary_test.go | 14 +++ .../functional/canary-unscaledownonabort.yaml | 96 +++++++++++++++++++ test/fixtures/then.go | 40 ++++++++ utils/defaults/defaults.go | 2 +- 11 files changed, 178 insertions(+), 20 deletions(-) create mode 100644 test/e2e/functional/canary-unscaledownonabort.yaml diff --git a/docs/features/scaledown-aborted-rs.md b/docs/features/scaledown-aborted-rs.md index 8b92b13e53..40740fce64 100644 --- a/docs/features/scaledown-aborted-rs.md +++ b/docs/features/scaledown-aborted-rs.md @@ -1,8 +1,10 @@ # Scaledown New Replicaset on Aborted Rollout -Upon an abort, we should scale down the new replicaset for all strategies. Users can then choose to leave the new replicaset scaled up indefinitely by setting abortScaleDownDelaySeconds to 0, or adjust the value to something larger (or smaller). +Upon an aborted update, we may scale down the new replicaset for all strategies. Users can then choose to leave the new replicaset scaled up indefinitely by setting abortScaleDownDelaySeconds to 0, or adjust the value to something larger (or smaller). -`abortScaleDownDelaySeconds = nil` is the default. +The following table summarizes the behavior under combinations of rollout strategy and `abortScaleDownDelaySeconds`. Note that `abortScaleDownDelaySeconds` is not applicable to argo-rollouts v1.0. +`abortScaleDownDelaySeconds = nil` is the default, which means in v1.1 across all rollout strategies, the new replicaset +is scaled down in 30 seconds on abort by default. | strategy | v1.0 behavior | abortScaleDownDelaySeconds | v1.1 behavior | |--------------------------------------------:|:-----------------------------:|:--------------------------:|:-----------------------------:| diff --git a/docs/features/specification.md b/docs/features/specification.md index 4e0d971570..60acd6cb21 100644 --- a/docs/features/specification.md +++ b/docs/features/specification.md @@ -115,8 +115,8 @@ spec: # down. Defaults to nil scaleDownDelayRevisionLimit: 2 - # Adds a delay before scaling down the preview replicaset if update is - # aborted. Default is 0, meaning preview replicaset won't be scaled down. + # Add a delay in second before scaling down the preview replicaset + # if update is aborted. 0 means not to scale down. Default is 30 second abortScaleDownDelaySeconds: 30 # Anti Affinity configuration between desired and previous ReplicaSet. @@ -299,10 +299,10 @@ spec: rootService: root-svc # optional trafficSplitName: rollout-example-traffic-split # optional - # Add a delay before scaling down the canary pods when update - # is aborted for canary strategy using replicas of setCanaryScale. - # Default is 0, meaning canary pods are not scaled down. - AbortScaleDownDelaySeconds: 30 + # Add a delay in second before scaling down the canary pods when update + # is aborted for canary strategy with traffic routing (not applicable for basic canary). + # 0 means canary pods are not scaled down. Default is 30 seconds. + abortScaleDownDelaySeconds: 30 status: pauseConditions: diff --git a/pkg/apiclient/rollout/rollout.swagger.json b/pkg/apiclient/rollout/rollout.swagger.json index d1f2cc7ac0..9e20548068 100644 --- a/pkg/apiclient/rollout/rollout.swagger.json +++ b/pkg/apiclient/rollout/rollout.swagger.json @@ -666,7 +666,7 @@ "abortScaleDownDelaySeconds": { "type": "integer", "format": "int32", - "title": "AbortScaleDownDelaySeconds adds a delay before scaling down the preview replicaset\nif update is aborted. Default is 0, meaning preview replicaset is not scaled down.\n+optional" + "title": "AbortScaleDownDelaySeconds adds a delay in second before scaling down the preview replicaset\nif update is aborted. 0 means not to scale down.\nDefault is 30 second\n+optional" } }, "title": "BlueGreenStrategy defines parameters for Blue Green deployment" @@ -775,7 +775,7 @@ "abortScaleDownDelaySeconds": { "type": "integer", "format": "int32", - "title": "AbortScaleDownDelaySeconds adds a delay before scaling down the canary pods when update\nis aborted for canary strategy with traffic routing (not applicable for basic canary).\nDefault is 0, meaning canary pods are not scaled down.\n+optional" + "title": "AbortScaleDownDelaySeconds adds a delay in second before scaling down the canary pods when update\nis aborted for canary strategy with traffic routing (not applicable for basic canary).\n0 means canary pods are not scaled down.\nDefault is 30 seconds.\n+optional" } }, "title": "CanaryStrategy defines parameters for a Replica Based Canary" diff --git a/pkg/apis/rollouts/v1alpha1/types.go b/pkg/apis/rollouts/v1alpha1/types.go index 03eb4df263..589fdc821d 100644 --- a/pkg/apis/rollouts/v1alpha1/types.go +++ b/pkg/apis/rollouts/v1alpha1/types.go @@ -202,8 +202,9 @@ type BlueGreenStrategy struct { // ActiveMetadata specify labels and annotations which will be attached to the active pods for // the duration which they act as a active pod, and will be removed after ActiveMetadata *PodTemplateMetadata `json:"activeMetadata,omitempty" protobuf:"bytes,13,opt,name=activeMetadata"` - // AbortScaleDownDelaySeconds adds a delay before scaling down the preview replicaset - // if update is aborted. Default is 0, meaning preview replicaset is not scaled down. + // AbortScaleDownDelaySeconds adds a delay in second before scaling down the preview replicaset + // if update is aborted. 0 means not to scale down. + // Default is 30 second // +optional AbortScaleDownDelaySeconds *int32 `json:"abortScaleDownDelaySeconds,omitempty" protobuf:"varint,14,opt,name=abortScaleDownDelaySeconds"` } @@ -286,9 +287,10 @@ type CanaryStrategy struct { // ScaleDownDelayRevisionLimit limits the number of old RS that can run at one time before getting scaled down // +optional ScaleDownDelayRevisionLimit *int32 `json:"scaleDownDelayRevisionLimit,omitempty" protobuf:"varint,12,opt,name=scaleDownDelayRevisionLimit"` - // AbortScaleDownDelaySeconds adds a delay before scaling down the canary pods when update + // AbortScaleDownDelaySeconds adds a delay in second before scaling down the canary pods when update // is aborted for canary strategy with traffic routing (not applicable for basic canary). - // Default is 0, meaning canary pods are not scaled down. + // 0 means canary pods are not scaled down. + // Default is 30 seconds. // +optional AbortScaleDownDelaySeconds *int32 `json:"abortScaleDownDelaySeconds,omitempty" protobuf:"varint,13,opt,name=abortScaleDownDelaySeconds"` } diff --git a/rollout/bluegreen_test.go b/rollout/bluegreen_test.go index ad37c8c073..500343a521 100644 --- a/rollout/bluegreen_test.go +++ b/rollout/bluegreen_test.go @@ -26,9 +26,11 @@ var ( func newBlueGreenRollout(name string, replicas int, revisionHistoryLimit *int32, activeSvc string, previewSvc string) *v1alpha1.Rollout { rollout := newRollout(name, replicas, revisionHistoryLimit, map[string]string{"foo": "bar"}) + abortScaleDownDelaySeconds := int32(0) rollout.Spec.Strategy.BlueGreen = &v1alpha1.BlueGreenStrategy{ - ActiveService: activeSvc, - PreviewService: previewSvc, + ActiveService: activeSvc, + PreviewService: previewSvc, + AbortScaleDownDelaySeconds: &abortScaleDownDelaySeconds, } rollout.Status.CurrentStepHash = conditions.ComputeStepHash(rollout) rollout.Status.CurrentPodHash = controller.ComputeHash(&rollout.Spec.Template, rollout.Status.CollisionCount) diff --git a/rollout/replicaset.go b/rollout/replicaset.go index 506d6be8a2..6adad2cf47 100644 --- a/rollout/replicaset.go +++ b/rollout/replicaset.go @@ -39,7 +39,7 @@ func (c *rolloutContext) removeScaleDownDelay(rs *appsv1.ReplicaSet) error { } // addScaleDownDelay injects the `scale-down-deadline` annotation to the ReplicaSet, or if -// scaleDownDelaySeconds is zero, removes it if it exists +// scaleDownDelaySeconds is zero, removes the annotation if it exists func (c *rolloutContext) addScaleDownDelay(rs *appsv1.ReplicaSet, scaleDownDelaySeconds time.Duration) error { if rs == nil { return nil diff --git a/test/e2e/analysis_test.go b/test/e2e/analysis_test.go index 42e3e2135e..317977c2cb 100644 --- a/test/e2e/analysis_test.go +++ b/test/e2e/analysis_test.go @@ -195,6 +195,7 @@ spec: replicas: 2 strategy: blueGreen: + abortScaleDownDelaySeconds: 0 activeService: pre-promotion-fail-active previewService: pre-promotion-fail-preview previewReplicaCount: 1 @@ -402,7 +403,7 @@ spec: WaitForRolloutStatus("Paused"). Then(). ExpectRevisionPodCount("1", 1). - ExpectRevisionPodCount("2", 0). + ExpectRevisionScaleDown("2", true). ExpectRevisionPodCount("3", 1). ExpectActiveRevision("1"). ExpectPreviewRevision("3"). @@ -411,7 +412,8 @@ spec: WaitForRolloutStatus("Healthy"). Then(). ExpectRevisionPodCount("1", 1). - ExpectRevisionPodCount("2", 0). + ExpectRevisionScaleDown("1", true). + ExpectRevisionScaleDown("2", true). ExpectRevisionPodCount("3", 1). ExpectActiveRevision("3"). ExpectPreviewRevision("3"). diff --git a/test/e2e/canary_test.go b/test/e2e/canary_test.go index f3f4008c6a..acc0f0d600 100644 --- a/test/e2e/canary_test.go +++ b/test/e2e/canary_test.go @@ -487,3 +487,17 @@ func (s *CanarySuite) TestCanaryScaleDownOnAbort() { Then(). ExpectRevisionPodCount("2", 0) } + +func (s *CanarySuite) TestCanaryUnScaleDownOnAbort() { + s.Given(). + HealthyRollout(`@functional/canary-unscaledownonabort.yaml`). + When(). + UpdateSpec(). // update to revision 2 + WaitForRolloutStatus("Paused"). + AbortRollout(). + WaitForRolloutStatus("Degraded"). + Sleep(3*time.Second). + Then(). + ExpectRevisionPodCount("2", 1). + ExpectRevisionScaleDown("2", false) +} diff --git a/test/e2e/functional/canary-unscaledownonabort.yaml b/test/e2e/functional/canary-unscaledownonabort.yaml new file mode 100644 index 0000000000..cf9ce26438 --- /dev/null +++ b/test/e2e/functional/canary-unscaledownonabort.yaml @@ -0,0 +1,96 @@ +apiVersion: v1 +kind: Service +metadata: + name: canary-scaledowndelay-root +spec: + type: NodePort + ports: + - port: 80 + targetPort: http + protocol: TCP + name: http + selector: + app: canary-scaledowndelay +--- +apiVersion: v1 +kind: Service +metadata: + name: canary-scaledowndelay-canary +spec: + type: NodePort + ports: + - port: 80 + targetPort: http + protocol: TCP + name: http + selector: + app: canary-scaledowndelay +--- +apiVersion: v1 +kind: Service +metadata: + name: canary-scaledowndelay-stable +spec: + type: NodePort + ports: + - port: 80 + targetPort: http + protocol: TCP + name: http + selector: + app: canary-scaledowndelay +--- +apiVersion: networking.k8s.io/v1beta1 +kind: Ingress +metadata: + name: canary-scaledowndelay-ingress + annotations: + kubernetes.io/ingress.class: alb +spec: + rules: + - http: + paths: + - path: /* + backend: + serviceName: canary-scaledowndelay-root + servicePort: use-annotation +--- +apiVersion: argoproj.io/v1alpha1 +kind: Rollout +metadata: + name: canary-scaledownd-on-abort +spec: + selector: + matchLabels: + app: canary-scaledowndelay + template: + metadata: + labels: + app: canary-scaledowndelay + spec: + containers: + - name: canary-scaledowndelay + image: nginx:1.19-alpine + ports: + - name: http + containerPort: 80 + protocol: TCP + resources: + requests: + memory: 16Mi + cpu: 5m + strategy: + canary: + abortScaleDownDelaySeconds: 0 + scaleDownDelayRevisionLimit: 1 + canaryService: canary-scaledowndelay-canary + stableService: canary-scaledowndelay-stable + steps: + - setCanaryScale: + replicas: 1 + - pause: {} + trafficRouting: + alb: + ingress: canary-scaledowndelay-ingress + rootService: canary-scaledowndelay-root + servicePort: 80 \ No newline at end of file diff --git a/test/fixtures/then.go b/test/fixtures/then.go index 7e51dbb010..badec1b09e 100644 --- a/test/fixtures/then.go +++ b/test/fixtures/then.go @@ -9,6 +9,7 @@ import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1" rov1 "github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1" "github.com/argoproj/argo-rollouts/utils/defaults" replicasetutil "github.com/argoproj/argo-rollouts/utils/replicaset" @@ -73,6 +74,7 @@ func (t *Then) ExpectReplicaCounts(desired, current, updated, ready, available i } type PodExpectation func(*corev1.PodList) bool +type ReplicasetExpectation func(*appsv1.ReplicaSet) bool func (t *Then) ExpectPods(expectation string, expectFunc PodExpectation) *Then { t.t.Helper() @@ -115,6 +117,44 @@ func (t *Then) ExpectRevisionPodCount(revision string, expectedCount int) *Then return t.expectPodCountByHash(description, hash, expectedCount) } +func (t *Then) ExpectRevisionScaleDown(revision string, expectScaleDown bool) *Then { + t.t.Helper() + rs := t.GetReplicaSetByRevision(revision) + description := fmt.Sprintf("revision:%s", revision) + return t.expectRSScaleDownByName(description, rs.Name, expectScaleDown) +} + +func (t *Then) expectRSScaleDownByName(description, name string, expectScaleDown bool) *Then { + return t.ExpectRS(fmt.Sprintf("RS %s scale down", name), name, func(rs *appsv1.ReplicaSet) bool { + hasScaleDownDelay := false + + if _, ok := rs.Annotations[v1alpha1.DefaultReplicaSetScaleDownDeadlineAnnotationKey]; ok { + hasScaleDownDelay = true + } + + metExpectation := hasScaleDownDelay == expectScaleDown + if !metExpectation { + t.log.Warnf("unexpected %s (rs %s): expected to be scaled down is %v", description, name, expectScaleDown) + } + return metExpectation + }) +} + +func (t *Then) ExpectRS(expectation string, name string, expectFunc ReplicasetExpectation) *Then { + t.t.Helper() + _, err := metav1.LabelSelectorAsSelector(t.Rollout().Spec.Selector) + t.CheckError(err) + rs, err := t.kubeClient.AppsV1().ReplicaSets(t.namespace).Get(t.Context, name, metav1.GetOptions{}) + // List(t.Context, metav1.ListOptions{LabelSelector: selector.String()}) + t.CheckError(err) + if !expectFunc(rs) { + t.log.Errorf("rs expectation '%s' failed", expectation) + t.t.FailNow() + } + t.log.Infof("rs expectation '%s' met", expectation) + return t +} + func (t *Then) expectPodCountByHash(description, hash string, expectedCount int) *Then { return t.ExpectPods(fmt.Sprintf("%s pod count == %d", description, expectedCount), func(pods *corev1.PodList) bool { count := 0 diff --git a/utils/defaults/defaults.go b/utils/defaults/defaults.go index a1a85c3578..f4cf6bf336 100644 --- a/utils/defaults/defaults.go +++ b/utils/defaults/defaults.go @@ -24,7 +24,7 @@ const ( // DefaultScaleDownDelaySeconds default seconds before scaling down old replicaset after switching services DefaultScaleDownDelaySeconds = int32(30) // DefaultAbortScaleDownDelaySeconds default seconds before scaling down old replicaset after switching services - DefaultAbortScaleDownDelaySeconds = int32(0) + DefaultAbortScaleDownDelaySeconds = int32(30) // DefaultAutoPromotionEnabled default value for auto promoting a blueGreen strategy DefaultAutoPromotionEnabled = true // DefaultConsecutiveErrorLimit is the default number times a metric can error in sequence before